xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision dfd57a172ac9fa6c7b5fe6de6ab5df85cefc2996)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
66 {
67   PetscErrorCode ierr;
68   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
69 
70   PetscFunctionBegin;
71   if (mat->A) {
72     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
73     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
74   }
75   PetscFunctionReturn(0);
76 }
77 
78 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
79 {
80   PetscErrorCode  ierr;
81   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
82   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
83   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
84   const PetscInt  *ia,*ib;
85   const MatScalar *aa,*bb,*aav,*bav;
86   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
87   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
88 
89   PetscFunctionBegin;
90   *keptrows = NULL;
91 
92   ia   = a->i;
93   ib   = b->i;
94   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
95   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
96   for (i=0; i<m; i++) {
97     na = ia[i+1] - ia[i];
98     nb = ib[i+1] - ib[i];
99     if (!na && !nb) {
100       cnt++;
101       goto ok1;
102     }
103     aa = aav + ia[i];
104     for (j=0; j<na; j++) {
105       if (aa[j] != 0.0) goto ok1;
106     }
107     bb = bav + ib[i];
108     for (j=0; j <nb; j++) {
109       if (bb[j] != 0.0) goto ok1;
110     }
111     cnt++;
112 ok1:;
113   }
114   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRMPI(ierr);
115   if (!n0rows) {
116     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
117     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
118     PetscFunctionReturn(0);
119   }
120   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
121   cnt  = 0;
122   for (i=0; i<m; i++) {
123     na = ia[i+1] - ia[i];
124     nb = ib[i+1] - ib[i];
125     if (!na && !nb) continue;
126     aa = aav + ia[i];
127     for (j=0; j<na;j++) {
128       if (aa[j] != 0.0) {
129         rows[cnt++] = rstart + i;
130         goto ok2;
131       }
132     }
133     bb = bav + ib[i];
134     for (j=0; j<nb; j++) {
135       if (bb[j] != 0.0) {
136         rows[cnt++] = rstart + i;
137         goto ok2;
138       }
139     }
140 ok2:;
141   }
142   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
143   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
145   PetscFunctionReturn(0);
146 }
147 
148 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
149 {
150   PetscErrorCode    ierr;
151   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
152   PetscBool         cong;
153 
154   PetscFunctionBegin;
155   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
156   if (Y->assembled && cong) {
157     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
158   } else {
159     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
160   }
161   PetscFunctionReturn(0);
162 }
163 
164 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
165 {
166   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
167   PetscErrorCode ierr;
168   PetscInt       i,rstart,nrows,*rows;
169 
170   PetscFunctionBegin;
171   *zrows = NULL;
172   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
173   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
174   for (i=0; i<nrows; i++) rows[i] += rstart;
175   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
176   PetscFunctionReturn(0);
177 }
178 
179 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
180 {
181   PetscErrorCode    ierr;
182   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
183   PetscInt          i,n,*garray = aij->garray;
184   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
185   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
186   PetscReal         *work;
187   const PetscScalar *dummy;
188 
189   PetscFunctionBegin;
190   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
191   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
192   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
193   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   if (type == NORM_2) {
197     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
198       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
199     }
200     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
201       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
202     }
203   } else if (type == NORM_1) {
204     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
205       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
206     }
207     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
208       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
209     }
210   } else if (type == NORM_INFINITY) {
211     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
212       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
213     }
214     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
215       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
216     }
217 
218   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
219   if (type == NORM_INFINITY) {
220     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
221   } else {
222     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
223   }
224   ierr = PetscFree(work);CHKERRQ(ierr);
225   if (type == NORM_2) {
226     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
227   }
228   PetscFunctionReturn(0);
229 }
230 
231 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
232 {
233   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
234   IS              sis,gis;
235   PetscErrorCode  ierr;
236   const PetscInt  *isis,*igis;
237   PetscInt        n,*iis,nsis,ngis,rstart,i;
238 
239   PetscFunctionBegin;
240   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
241   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
242   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
243   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
244   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
245   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
246 
247   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
248   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
249   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
250   n    = ngis + nsis;
251   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
252   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
253   for (i=0; i<n; i++) iis[i] += rstart;
254   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
255 
256   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
257   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
258   ierr = ISDestroy(&sis);CHKERRQ(ierr);
259   ierr = ISDestroy(&gis);CHKERRQ(ierr);
260   PetscFunctionReturn(0);
261 }
262 
263 /*
264   Local utility routine that creates a mapping from the global column
265 number to the local number in the off-diagonal part of the local
266 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
267 a slightly higher hash table cost; without it it is not scalable (each processor
268 has an order N integer array but is fast to access.
269 */
270 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
271 {
272   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
273   PetscErrorCode ierr;
274   PetscInt       n = aij->B->cmap->n,i;
275 
276   PetscFunctionBegin;
277   if (n && !aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
278 #if defined(PETSC_USE_CTABLE)
279   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
280   for (i=0; i<n; i++) {
281     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
282   }
283 #else
284   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
285   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
286   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
287 #endif
288   PetscFunctionReturn(0);
289 }
290 
291 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
292 { \
293     if (col <= lastcol1)  low1 = 0;     \
294     else                 high1 = nrow1; \
295     lastcol1 = col;\
296     while (high1-low1 > 5) { \
297       t = (low1+high1)/2; \
298       if (rp1[t] > col) high1 = t; \
299       else              low1  = t; \
300     } \
301       for (_i=low1; _i<high1; _i++) { \
302         if (rp1[_i] > col) break; \
303         if (rp1[_i] == col) { \
304           if (addv == ADD_VALUES) { \
305             ap1[_i] += value;   \
306             /* Not sure LogFlops will slow dow the code or not */ \
307             (void)PetscLogFlops(1.0);   \
308            } \
309           else                    ap1[_i] = value; \
310           inserted = PETSC_TRUE; \
311           goto a_noinsert; \
312         } \
313       }  \
314       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
315       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
316       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
317       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
318       N = nrow1++ - 1; a->nz++; high1++; \
319       /* shift up all the later entries in this row */ \
320       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
321       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
322       rp1[_i] = col;  \
323       ap1[_i] = value;  \
324       A->nonzerostate++;\
325       a_noinsert: ; \
326       ailen[row] = nrow1; \
327 }
328 
329 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
330   { \
331     if (col <= lastcol2) low2 = 0;                        \
332     else high2 = nrow2;                                   \
333     lastcol2 = col;                                       \
334     while (high2-low2 > 5) {                              \
335       t = (low2+high2)/2;                                 \
336       if (rp2[t] > col) high2 = t;                        \
337       else             low2  = t;                         \
338     }                                                     \
339     for (_i=low2; _i<high2; _i++) {                       \
340       if (rp2[_i] > col) break;                           \
341       if (rp2[_i] == col) {                               \
342         if (addv == ADD_VALUES) {                         \
343           ap2[_i] += value;                               \
344           (void)PetscLogFlops(1.0);                       \
345         }                                                 \
346         else                    ap2[_i] = value;          \
347         inserted = PETSC_TRUE;                            \
348         goto b_noinsert;                                  \
349       }                                                   \
350     }                                                     \
351     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
352     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
353     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
354     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
355     N = nrow2++ - 1; b->nz++; high2++;                    \
356     /* shift up all the later entries in this row */      \
357     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
358     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
359     rp2[_i] = col;                                        \
360     ap2[_i] = value;                                      \
361     B->nonzerostate++;                                    \
362     b_noinsert: ;                                         \
363     bilen[row] = nrow2;                                   \
364   }
365 
366 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
367 {
368   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
369   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
370   PetscErrorCode ierr;
371   PetscInt       l,*garray = mat->garray,diag;
372 
373   PetscFunctionBegin;
374   /* code only works for square matrices A */
375 
376   /* find size of row to the left of the diagonal part */
377   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
378   row  = row - diag;
379   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
380     if (garray[b->j[b->i[row]+l]] > diag) break;
381   }
382   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
383 
384   /* diagonal part */
385   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
386 
387   /* right of diagonal part */
388   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
389 #if defined(PETSC_HAVE_DEVICE)
390   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
391 #endif
392   PetscFunctionReturn(0);
393 }
394 
395 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
396 {
397   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
398   PetscScalar    value = 0.0;
399   PetscErrorCode ierr;
400   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
401   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
402   PetscBool      roworiented = aij->roworiented;
403 
404   /* Some Variables required in the macro */
405   Mat        A                    = aij->A;
406   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
407   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
408   PetscBool  ignorezeroentries    = a->ignorezeroentries;
409   Mat        B                    = aij->B;
410   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
411   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
412   MatScalar  *aa,*ba;
413   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
414    * cannot use "#if defined" inside a macro. */
415   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
416 
417   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
418   PetscInt  nonew;
419   MatScalar *ap1,*ap2;
420 
421   PetscFunctionBegin;
422 #if defined(PETSC_HAVE_DEVICE)
423   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
424     const PetscScalar *dummy;
425     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
426     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
427   }
428   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
429     const PetscScalar *dummy;
430     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
431     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
432   }
433 #endif
434   aa = a->a;
435   ba = b->a;
436   for (i=0; i<m; i++) {
437     if (im[i] < 0) continue;
438     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
439     if (im[i] >= rstart && im[i] < rend) {
440       row      = im[i] - rstart;
441       lastcol1 = -1;
442       rp1      = aj + ai[row];
443       ap1      = aa + ai[row];
444       rmax1    = aimax[row];
445       nrow1    = ailen[row];
446       low1     = 0;
447       high1    = nrow1;
448       lastcol2 = -1;
449       rp2      = bj + bi[row];
450       ap2      = ba + bi[row];
451       rmax2    = bimax[row];
452       nrow2    = bilen[row];
453       low2     = 0;
454       high2    = nrow2;
455 
456       for (j=0; j<n; j++) {
457         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
458         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
459         if (in[j] >= cstart && in[j] < cend) {
460           col   = in[j] - cstart;
461           nonew = a->nonew;
462           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
463 #if defined(PETSC_HAVE_DEVICE)
464           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
465 #endif
466         } else if (in[j] < 0) continue;
467         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
468         else {
469           if (mat->was_assembled) {
470             if (!aij->colmap) {
471               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
472             }
473 #if defined(PETSC_USE_CTABLE)
474             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
475             col--;
476 #else
477             col = aij->colmap[in[j]] - 1;
478 #endif
479             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
480               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
481               col  =  in[j];
482               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
483               B        = aij->B;
484               b        = (Mat_SeqAIJ*)B->data;
485               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
486               rp2      = bj + bi[row];
487               ap2      = ba + bi[row];
488               rmax2    = bimax[row];
489               nrow2    = bilen[row];
490               low2     = 0;
491               high2    = nrow2;
492               bm       = aij->B->rmap->n;
493               ba       = b->a;
494               inserted = PETSC_FALSE;
495             } else if (col < 0 && !(ignorezeroentries && value == 0.0)) {
496               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
497                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
498               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
499             }
500           } else col = in[j];
501           nonew = b->nonew;
502           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
503 #if defined(PETSC_HAVE_DEVICE)
504           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
505 #endif
506         }
507       }
508     } else {
509       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
510       if (!aij->donotstash) {
511         mat->assembled = PETSC_FALSE;
512         if (roworiented) {
513           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
514         } else {
515           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
516         }
517       }
518     }
519   }
520   PetscFunctionReturn(0);
521 }
522 
523 /*
524     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
525     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
526     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
527 */
528 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
529 {
530   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
531   Mat            A           = aij->A; /* diagonal part of the matrix */
532   Mat            B           = aij->B; /* offdiagonal part of the matrix */
533   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
534   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
535   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
536   PetscInt       *ailen      = a->ilen,*aj = a->j;
537   PetscInt       *bilen      = b->ilen,*bj = b->j;
538   PetscInt       am          = aij->A->rmap->n,j;
539   PetscInt       diag_so_far = 0,dnz;
540   PetscInt       offd_so_far = 0,onz;
541 
542   PetscFunctionBegin;
543   /* Iterate over all rows of the matrix */
544   for (j=0; j<am; j++) {
545     dnz = onz = 0;
546     /*  Iterate over all non-zero columns of the current row */
547     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
548       /* If column is in the diagonal */
549       if (mat_j[col] >= cstart && mat_j[col] < cend) {
550         aj[diag_so_far++] = mat_j[col] - cstart;
551         dnz++;
552       } else { /* off-diagonal entries */
553         bj[offd_so_far++] = mat_j[col];
554         onz++;
555       }
556     }
557     ailen[j] = dnz;
558     bilen[j] = onz;
559   }
560   PetscFunctionReturn(0);
561 }
562 
563 /*
564     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
565     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
566     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
567     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
568     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
569 */
570 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
571 {
572   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
573   Mat            A      = aij->A; /* diagonal part of the matrix */
574   Mat            B      = aij->B; /* offdiagonal part of the matrix */
575   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
576   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
577   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
578   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
579   PetscInt       *ailen = a->ilen,*aj = a->j;
580   PetscInt       *bilen = b->ilen,*bj = b->j;
581   PetscInt       am     = aij->A->rmap->n,j;
582   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
583   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
584   PetscScalar    *aa = a->a,*ba = b->a;
585 
586   PetscFunctionBegin;
587   /* Iterate over all rows of the matrix */
588   for (j=0; j<am; j++) {
589     dnz_row = onz_row = 0;
590     rowstart_offd = full_offd_i[j];
591     rowstart_diag = full_diag_i[j];
592     /*  Iterate over all non-zero columns of the current row */
593     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
594       /* If column is in the diagonal */
595       if (mat_j[col] >= cstart && mat_j[col] < cend) {
596         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
597         aa[rowstart_diag+dnz_row] = mat_a[col];
598         dnz_row++;
599       } else { /* off-diagonal entries */
600         bj[rowstart_offd+onz_row] = mat_j[col];
601         ba[rowstart_offd+onz_row] = mat_a[col];
602         onz_row++;
603       }
604     }
605     ailen[j] = dnz_row;
606     bilen[j] = onz_row;
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
612 {
613   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
614   PetscErrorCode ierr;
615   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
616   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
617 
618   PetscFunctionBegin;
619   for (i=0; i<m; i++) {
620     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
621     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
622     if (idxm[i] >= rstart && idxm[i] < rend) {
623       row = idxm[i] - rstart;
624       for (j=0; j<n; j++) {
625         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
626         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
627         if (idxn[j] >= cstart && idxn[j] < cend) {
628           col  = idxn[j] - cstart;
629           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
630         } else {
631           if (!aij->colmap) {
632             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
633           }
634 #if defined(PETSC_USE_CTABLE)
635           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
636           col--;
637 #else
638           col = aij->colmap[idxn[j]] - 1;
639 #endif
640           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
641           else {
642             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
643           }
644         }
645       }
646     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
647   }
648   PetscFunctionReturn(0);
649 }
650 
651 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
654   PetscErrorCode ierr;
655   PetscInt       nstash,reallocs;
656 
657   PetscFunctionBegin;
658   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
659 
660   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
667 {
668   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
669   PetscErrorCode ierr;
670   PetscMPIInt    n;
671   PetscInt       i,j,rstart,ncols,flg;
672   PetscInt       *row,*col;
673   PetscBool      other_disassembled;
674   PetscScalar    *val;
675 
676   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
677 
678   PetscFunctionBegin;
679   if (!aij->donotstash && !mat->nooffprocentries) {
680     while (1) {
681       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
682       if (!flg) break;
683 
684       for (i=0; i<n;) {
685         /* Now identify the consecutive vals belonging to the same row */
686         for (j=i,rstart=row[j]; j<n; j++) {
687           if (row[j] != rstart) break;
688         }
689         if (j < n) ncols = j-i;
690         else       ncols = n-i;
691         /* Now assemble all these values with a single function call */
692         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
693         i    = j;
694       }
695     }
696     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
697   }
698 #if defined(PETSC_HAVE_DEVICE)
699   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
700   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
701   if (mat->boundtocpu) {
702     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
703     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
704   }
705 #endif
706   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
707   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
708 
709   /* determine if any processor has disassembled, if so we must
710      also disassemble ourself, in order that we may reassemble. */
711   /*
712      if nonzero structure of submatrix B cannot change then we know that
713      no processor disassembled thus we can skip this stuff
714   */
715   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
716     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
717     if (mat->was_assembled && !other_disassembled) {
718 #if defined(PETSC_HAVE_DEVICE)
719       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
720 #endif
721       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
722     }
723   }
724   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
725     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
726   }
727   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
728 #if defined(PETSC_HAVE_DEVICE)
729   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
730 #endif
731   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
733 
734   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
735 
736   aij->rowvalues = NULL;
737 
738   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
739 
740   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
741   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
742     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
743     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
744   }
745 #if defined(PETSC_HAVE_DEVICE)
746   mat->offloadmask = PETSC_OFFLOAD_BOTH;
747 #endif
748   PetscFunctionReturn(0);
749 }
750 
751 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
752 {
753   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
754   PetscErrorCode ierr;
755 
756   PetscFunctionBegin;
757   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
758   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
759   PetscFunctionReturn(0);
760 }
761 
762 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
763 {
764   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
765   PetscObjectState sA, sB;
766   PetscInt        *lrows;
767   PetscInt         r, len;
768   PetscBool        cong, lch, gch;
769   PetscErrorCode   ierr;
770 
771   PetscFunctionBegin;
772   /* get locally owned rows */
773   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
774   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
775   /* fix right hand side if needed */
776   if (x && b) {
777     const PetscScalar *xx;
778     PetscScalar       *bb;
779 
780     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
781     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
782     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
783     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
784     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
785     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
786   }
787 
788   sA = mat->A->nonzerostate;
789   sB = mat->B->nonzerostate;
790 
791   if (diag != 0.0 && cong) {
792     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
793     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
794   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
795     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
796     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
797     PetscInt   nnwA, nnwB;
798     PetscBool  nnzA, nnzB;
799 
800     nnwA = aijA->nonew;
801     nnwB = aijB->nonew;
802     nnzA = aijA->keepnonzeropattern;
803     nnzB = aijB->keepnonzeropattern;
804     if (!nnzA) {
805       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
806       aijA->nonew = 0;
807     }
808     if (!nnzB) {
809       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
810       aijB->nonew = 0;
811     }
812     /* Must zero here before the next loop */
813     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
814     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
815     for (r = 0; r < len; ++r) {
816       const PetscInt row = lrows[r] + A->rmap->rstart;
817       if (row >= A->cmap->N) continue;
818       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
819     }
820     aijA->nonew = nnwA;
821     aijB->nonew = nnwB;
822   } else {
823     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
824     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825   }
826   ierr = PetscFree(lrows);CHKERRQ(ierr);
827   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
828   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829 
830   /* reduce nonzerostate */
831   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
832   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
833   if (gch) A->nonzerostate++;
834   PetscFunctionReturn(0);
835 }
836 
837 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
838 {
839   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
840   PetscErrorCode    ierr;
841   PetscMPIInt       n = A->rmap->n;
842   PetscInt          i,j,r,m,len = 0;
843   PetscInt          *lrows,*owners = A->rmap->range;
844   PetscMPIInt       p = 0;
845   PetscSFNode       *rrows;
846   PetscSF           sf;
847   const PetscScalar *xx;
848   PetscScalar       *bb,*mask;
849   Vec               xmask,lmask;
850   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
851   const PetscInt    *aj, *ii,*ridx;
852   PetscScalar       *aa;
853 
854   PetscFunctionBegin;
855   /* Create SF where leaves are input rows and roots are owned rows */
856   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
857   for (r = 0; r < n; ++r) lrows[r] = -1;
858   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
859   for (r = 0; r < N; ++r) {
860     const PetscInt idx   = rows[r];
861     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
862     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
863       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
864     }
865     rrows[r].rank  = p;
866     rrows[r].index = rows[r] - owners[p];
867   }
868   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
869   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
870   /* Collect flags for rows to be zeroed */
871   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
872   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
874   /* Compress and put in row numbers */
875   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
876   /* zero diagonal part of matrix */
877   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
878   /* handle off diagonal part of matrix */
879   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
880   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
881   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
882   for (i=0; i<len; i++) bb[lrows[i]] = 1;
883   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
884   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
887   if (x && b) { /* this code is buggy when the row and column layout don't match */
888     PetscBool cong;
889 
890     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
891     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
892     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
895     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
896   }
897   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
898   /* remove zeroed rows of off diagonal matrix */
899   ii = aij->i;
900   for (i=0; i<len; i++) {
901     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
902   }
903   /* loop over all elements of off process part of matrix zeroing removed columns*/
904   if (aij->compressedrow.use) {
905     m    = aij->compressedrow.nrows;
906     ii   = aij->compressedrow.i;
907     ridx = aij->compressedrow.rindex;
908     for (i=0; i<m; i++) {
909       n  = ii[i+1] - ii[i];
910       aj = aij->j + ii[i];
911       aa = aij->a + ii[i];
912 
913       for (j=0; j<n; j++) {
914         if (PetscAbsScalar(mask[*aj])) {
915           if (b) bb[*ridx] -= *aa*xx[*aj];
916           *aa = 0.0;
917         }
918         aa++;
919         aj++;
920       }
921       ridx++;
922     }
923   } else { /* do not use compressed row format */
924     m = l->B->rmap->n;
925     for (i=0; i<m; i++) {
926       n  = ii[i+1] - ii[i];
927       aj = aij->j + ii[i];
928       aa = aij->a + ii[i];
929       for (j=0; j<n; j++) {
930         if (PetscAbsScalar(mask[*aj])) {
931           if (b) bb[i] -= *aa*xx[*aj];
932           *aa = 0.0;
933         }
934         aa++;
935         aj++;
936       }
937     }
938   }
939   if (x && b) {
940     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
941     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
942   }
943   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
944   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
945   ierr = PetscFree(lrows);CHKERRQ(ierr);
946 
947   /* only change matrix nonzero state if pattern was allowed to be changed */
948   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
949     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
950     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
951   }
952   PetscFunctionReturn(0);
953 }
954 
955 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
956 {
957   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
958   PetscErrorCode ierr;
959   PetscInt       nt;
960   VecScatter     Mvctx = a->Mvctx;
961 
962   PetscFunctionBegin;
963   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
964   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
965   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
966   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
967   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
968   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
973 {
974   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
975   PetscErrorCode ierr;
976 
977   PetscFunctionBegin;
978   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
979   PetscFunctionReturn(0);
980 }
981 
982 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
983 {
984   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
985   PetscErrorCode ierr;
986   VecScatter     Mvctx = a->Mvctx;
987 
988   PetscFunctionBegin;
989   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
990   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
991   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
993   PetscFunctionReturn(0);
994 }
995 
996 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
997 {
998   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
999   PetscErrorCode ierr;
1000 
1001   PetscFunctionBegin;
1002   /* do nondiagonal part */
1003   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1004   /* do local part */
1005   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1006   /* add partial results together */
1007   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1008   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1013 {
1014   MPI_Comm       comm;
1015   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1016   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1017   IS             Me,Notme;
1018   PetscErrorCode ierr;
1019   PetscInt       M,N,first,last,*notme,i;
1020   PetscBool      lf;
1021   PetscMPIInt    size;
1022 
1023   PetscFunctionBegin;
1024   /* Easy test: symmetric diagonal block */
1025   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1026   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1027   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRMPI(ierr);
1028   if (!*f) PetscFunctionReturn(0);
1029   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1030   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1031   if (size == 1) PetscFunctionReturn(0);
1032 
1033   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1034   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1035   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1036   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1037   for (i=0; i<first; i++) notme[i] = i;
1038   for (i=last; i<M; i++) notme[i-last+first] = i;
1039   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1040   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1041   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1042   Aoff = Aoffs[0];
1043   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1044   Boff = Boffs[0];
1045   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1046   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1047   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1048   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1049   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1050   ierr = PetscFree(notme);CHKERRQ(ierr);
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1055 {
1056   PetscErrorCode ierr;
1057 
1058   PetscFunctionBegin;
1059   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1064 {
1065   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1066   PetscErrorCode ierr;
1067 
1068   PetscFunctionBegin;
1069   /* do nondiagonal part */
1070   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1071   /* do local part */
1072   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1073   /* add partial results together */
1074   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1075   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /*
1080   This only works correctly for square matrices where the subblock A->A is the
1081    diagonal block
1082 */
1083 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1084 {
1085   PetscErrorCode ierr;
1086   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1087 
1088   PetscFunctionBegin;
1089   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1090   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1091   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1096 {
1097   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1102   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1107 {
1108   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112 #if defined(PETSC_USE_LOG)
1113   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1114 #endif
1115   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1116   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1117   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1118   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1119 #if defined(PETSC_USE_CTABLE)
1120   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1121 #else
1122   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1123 #endif
1124   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1125   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1126   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1127   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1128   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1129   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1130 
1131   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1132   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1133 
1134   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1135   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1144 #if defined(PETSC_HAVE_CUDA)
1145   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1146 #endif
1147 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1148   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1149 #endif
1150   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL);CHKERRQ(ierr);
1151 #if defined(PETSC_HAVE_ELEMENTAL)
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1153 #endif
1154 #if defined(PETSC_HAVE_SCALAPACK)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1156 #endif
1157 #if defined(PETSC_HAVE_HYPRE)
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1160 #endif
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1167 #if defined(PETSC_HAVE_MKL_SPARSE)
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1177 {
1178   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1179   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1180   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1181   const PetscInt    *garray = aij->garray;
1182   const PetscScalar *aa,*ba;
1183   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1184   PetscInt          *rowlens;
1185   PetscInt          *colidxs;
1186   PetscScalar       *matvals;
1187   PetscErrorCode    ierr;
1188 
1189   PetscFunctionBegin;
1190   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1191 
1192   M  = mat->rmap->N;
1193   N  = mat->cmap->N;
1194   m  = mat->rmap->n;
1195   rs = mat->rmap->rstart;
1196   cs = mat->cmap->rstart;
1197   nz = A->nz + B->nz;
1198 
1199   /* write matrix header */
1200   header[0] = MAT_FILE_CLASSID;
1201   header[1] = M; header[2] = N; header[3] = nz;
1202   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1203   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1204 
1205   /* fill in and store row lengths  */
1206   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1207   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1208   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1209   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1210 
1211   /* fill in and store column indices */
1212   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1213   for (cnt=0, i=0; i<m; i++) {
1214     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1215       if (garray[B->j[jb]] > cs) break;
1216       colidxs[cnt++] = garray[B->j[jb]];
1217     }
1218     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1219       colidxs[cnt++] = A->j[ja] + cs;
1220     for (; jb<B->i[i+1]; jb++)
1221       colidxs[cnt++] = garray[B->j[jb]];
1222   }
1223   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1224   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1225   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1226 
1227   /* fill in and store nonzero values */
1228   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1229   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1230   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1231   for (cnt=0, i=0; i<m; i++) {
1232     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1233       if (garray[B->j[jb]] > cs) break;
1234       matvals[cnt++] = ba[jb];
1235     }
1236     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1237       matvals[cnt++] = aa[ja];
1238     for (; jb<B->i[i+1]; jb++)
1239       matvals[cnt++] = ba[jb];
1240   }
1241   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1242   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1243   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1244   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1245   ierr = PetscFree(matvals);CHKERRQ(ierr);
1246 
1247   /* write block size option to the viewer's .info file */
1248   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #include <petscdraw.h>
1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1254 {
1255   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1256   PetscErrorCode    ierr;
1257   PetscMPIInt       rank = aij->rank,size = aij->size;
1258   PetscBool         isdraw,iascii,isbinary;
1259   PetscViewer       sviewer;
1260   PetscViewerFormat format;
1261 
1262   PetscFunctionBegin;
1263   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1264   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1265   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1266   if (iascii) {
1267     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1268     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1269       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1270       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1271       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1272       for (i=0; i<(PetscInt)size; i++) {
1273         nmax = PetscMax(nmax,nz[i]);
1274         nmin = PetscMin(nmin,nz[i]);
1275         navg += nz[i];
1276       }
1277       ierr = PetscFree(nz);CHKERRQ(ierr);
1278       navg = navg/size;
1279       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1280       PetscFunctionReturn(0);
1281     }
1282     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1283     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1284       MatInfo   info;
1285       PetscInt *inodes=NULL;
1286 
1287       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1288       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1289       ierr = MatInodeGetInodeSizes(aij->A,NULL,&inodes,NULL);CHKERRQ(ierr);
1290       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1291       if (!inodes) {
1292         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1293                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1294       } else {
1295         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1296                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1297       }
1298       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1299       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1300       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1301       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1302       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1304       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1305       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1306       PetscFunctionReturn(0);
1307     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1308       PetscInt inodecount,inodelimit,*inodes;
1309       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1310       if (inodes) {
1311         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1312       } else {
1313         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1314       }
1315       PetscFunctionReturn(0);
1316     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1317       PetscFunctionReturn(0);
1318     }
1319   } else if (isbinary) {
1320     if (size == 1) {
1321       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1322       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1323     } else {
1324       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1325     }
1326     PetscFunctionReturn(0);
1327   } else if (iascii && size == 1) {
1328     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1329     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1336     if (isnull) PetscFunctionReturn(0);
1337   }
1338 
1339   { /* assemble the entire matrix onto first processor */
1340     Mat A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1344     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1345     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1346     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1347 /*  The commented code uses MatCreateSubMatrices instead */
1348 /*
1349     Mat *AA, A = NULL, Av;
1350     IS  isrow,iscol;
1351 
1352     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1353     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1354     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1355     if (!rank) {
1356        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1357        A    = AA[0];
1358        Av   = AA[0];
1359     }
1360     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1361 */
1362     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1363     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1364     /*
1365        Everyone has to call to draw the matrix since the graphics waits are
1366        synchronized across all processors that share the PetscDraw object
1367     */
1368     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1369     if (!rank) {
1370       if (((PetscObject)mat)->name) {
1371         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1372       }
1373       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1374     }
1375     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1376     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1377     ierr = MatDestroy(&A);CHKERRQ(ierr);
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1383 {
1384   PetscErrorCode ierr;
1385   PetscBool      iascii,isdraw,issocket,isbinary;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1392   if (iascii || isdraw || isbinary || issocket) {
1393     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1399 {
1400   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1401   PetscErrorCode ierr;
1402   Vec            bb1 = NULL;
1403   PetscBool      hasop;
1404 
1405   PetscFunctionBegin;
1406   if (flag == SOR_APPLY_UPPER) {
1407     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1408     PetscFunctionReturn(0);
1409   }
1410 
1411   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1412     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1413   }
1414 
1415   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1416     if (flag & SOR_ZERO_INITIAL_GUESS) {
1417       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1418       its--;
1419     }
1420 
1421     while (its--) {
1422       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1423       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424 
1425       /* update rhs: bb1 = bb - B*x */
1426       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1427       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1428 
1429       /* local sweep */
1430       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1431     }
1432   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1433     if (flag & SOR_ZERO_INITIAL_GUESS) {
1434       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1435       its--;
1436     }
1437     while (its--) {
1438       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1440 
1441       /* update rhs: bb1 = bb - B*x */
1442       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1443       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1444 
1445       /* local sweep */
1446       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1447     }
1448   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1449     if (flag & SOR_ZERO_INITIAL_GUESS) {
1450       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451       its--;
1452     }
1453     while (its--) {
1454       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1456 
1457       /* update rhs: bb1 = bb - B*x */
1458       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1459       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1460 
1461       /* local sweep */
1462       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1463     }
1464   } else if (flag & SOR_EISENSTAT) {
1465     Vec xx1;
1466 
1467     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1468     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1469 
1470     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472     if (!mat->diag) {
1473       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1474       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1475     }
1476     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1477     if (hasop) {
1478       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1479     } else {
1480       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1481     }
1482     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1483 
1484     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1485 
1486     /* local sweep */
1487     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1488     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1489     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1490   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1491 
1492   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1493 
1494   matin->factorerrortype = mat->A->factorerrortype;
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1499 {
1500   Mat            aA,aB,Aperm;
1501   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1502   PetscScalar    *aa,*ba;
1503   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1504   PetscSF        rowsf,sf;
1505   IS             parcolp = NULL;
1506   PetscBool      done;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1511   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1512   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1513   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1514 
1515   /* Invert row permutation to find out where my rows should go */
1516   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1517   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1518   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1519   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1520   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1521   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPI_REPLACE);CHKERRQ(ierr);
1522 
1523   /* Invert column permutation to find out where my columns should go */
1524   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1525   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1526   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1527   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1528   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1529   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPI_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1531 
1532   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1533   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1534   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1535 
1536   /* Find out where my gcols should go */
1537   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1538   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1542   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1543   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest,MPI_REPLACE);CHKERRQ(ierr);
1544   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1545 
1546   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1547   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1548   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1549   for (i=0; i<m; i++) {
1550     PetscInt    row = rdest[i];
1551     PetscMPIInt rowner;
1552     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1553     for (j=ai[i]; j<ai[i+1]; j++) {
1554       PetscInt    col = cdest[aj[j]];
1555       PetscMPIInt cowner;
1556       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1557       if (rowner == cowner) dnnz[i]++;
1558       else onnz[i]++;
1559     }
1560     for (j=bi[i]; j<bi[i+1]; j++) {
1561       PetscInt    col = gcdest[bj[j]];
1562       PetscMPIInt cowner;
1563       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567   }
1568   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1569   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz,MPI_REPLACE);CHKERRQ(ierr);
1570   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1571   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz,MPI_REPLACE);CHKERRQ(ierr);
1572   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1573 
1574   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1575   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1576   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1577   for (i=0; i<m; i++) {
1578     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1579     PetscInt j0,rowlen;
1580     rowlen = ai[i+1] - ai[i];
1581     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1582       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1583       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1584     }
1585     rowlen = bi[i+1] - bi[i];
1586     for (j0=j=0; j<rowlen; j0=j) {
1587       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1588       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1589     }
1590   }
1591   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1592   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1593   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1594   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1595   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1596   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1597   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1598   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1599   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1600   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1601   *B = Aperm;
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1606 {
1607   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1612   if (ghosts) *ghosts = aij->garray;
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1617 {
1618   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1619   Mat            A    = mat->A,B = mat->B;
1620   PetscErrorCode ierr;
1621   PetscLogDouble isend[5],irecv[5];
1622 
1623   PetscFunctionBegin;
1624   info->block_size = 1.0;
1625   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1626 
1627   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1628   isend[3] = info->memory;  isend[4] = info->mallocs;
1629 
1630   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1631 
1632   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1633   isend[3] += info->memory;  isend[4] += info->mallocs;
1634   if (flag == MAT_LOCAL) {
1635     info->nz_used      = isend[0];
1636     info->nz_allocated = isend[1];
1637     info->nz_unneeded  = isend[2];
1638     info->memory       = isend[3];
1639     info->mallocs      = isend[4];
1640   } else if (flag == MAT_GLOBAL_MAX) {
1641     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1642 
1643     info->nz_used      = irecv[0];
1644     info->nz_allocated = irecv[1];
1645     info->nz_unneeded  = irecv[2];
1646     info->memory       = irecv[3];
1647     info->mallocs      = irecv[4];
1648   } else if (flag == MAT_GLOBAL_SUM) {
1649     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRMPI(ierr);
1650 
1651     info->nz_used      = irecv[0];
1652     info->nz_allocated = irecv[1];
1653     info->nz_unneeded  = irecv[2];
1654     info->memory       = irecv[3];
1655     info->mallocs      = irecv[4];
1656   }
1657   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1658   info->fill_ratio_needed = 0;
1659   info->factor_mallocs    = 0;
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1664 {
1665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   switch (op) {
1670   case MAT_NEW_NONZERO_LOCATIONS:
1671   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1672   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1673   case MAT_KEEP_NONZERO_PATTERN:
1674   case MAT_NEW_NONZERO_LOCATION_ERR:
1675   case MAT_USE_INODES:
1676   case MAT_IGNORE_ZERO_ENTRIES:
1677   case MAT_FORM_EXPLICIT_TRANSPOSE:
1678     MatCheckPreallocated(A,1);
1679     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1680     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1681     break;
1682   case MAT_ROW_ORIENTED:
1683     MatCheckPreallocated(A,1);
1684     a->roworiented = flg;
1685 
1686     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1687     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1688     break;
1689   case MAT_FORCE_DIAGONAL_ENTRIES:
1690   case MAT_SORTED_FULL:
1691     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1692     break;
1693   case MAT_IGNORE_OFF_PROC_ENTRIES:
1694     a->donotstash = flg;
1695     break;
1696   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1697   case MAT_SPD:
1698   case MAT_SYMMETRIC:
1699   case MAT_STRUCTURALLY_SYMMETRIC:
1700   case MAT_HERMITIAN:
1701   case MAT_SYMMETRY_ETERNAL:
1702     break;
1703   case MAT_SUBMAT_SINGLEIS:
1704     A->submat_singleis = flg;
1705     break;
1706   case MAT_STRUCTURE_ONLY:
1707     /* The option is handled directly by MatSetOption() */
1708     break;
1709   default:
1710     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1711   }
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1716 {
1717   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1718   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1719   PetscErrorCode ierr;
1720   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1721   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1722   PetscInt       *cmap,*idx_p;
1723 
1724   PetscFunctionBegin;
1725   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1726   mat->getrowactive = PETSC_TRUE;
1727 
1728   if (!mat->rowvalues && (idx || v)) {
1729     /*
1730         allocate enough space to hold information from the longest row.
1731     */
1732     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1733     PetscInt   max = 1,tmp;
1734     for (i=0; i<matin->rmap->n; i++) {
1735       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1736       if (max < tmp) max = tmp;
1737     }
1738     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1739   }
1740 
1741   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1742   lrow = row - rstart;
1743 
1744   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1745   if (!v)   {pvA = NULL; pvB = NULL;}
1746   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1747   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1748   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1749   nztot = nzA + nzB;
1750 
1751   cmap = mat->garray;
1752   if (v  || idx) {
1753     if (nztot) {
1754       /* Sort by increasing column numbers, assuming A and B already sorted */
1755       PetscInt imark = -1;
1756       if (v) {
1757         *v = v_p = mat->rowvalues;
1758         for (i=0; i<nzB; i++) {
1759           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1760           else break;
1761         }
1762         imark = i;
1763         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1764         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1765       }
1766       if (idx) {
1767         *idx = idx_p = mat->rowindices;
1768         if (imark > -1) {
1769           for (i=0; i<imark; i++) {
1770             idx_p[i] = cmap[cworkB[i]];
1771           }
1772         } else {
1773           for (i=0; i<nzB; i++) {
1774             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1775             else break;
1776           }
1777           imark = i;
1778         }
1779         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1780         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1781       }
1782     } else {
1783       if (idx) *idx = NULL;
1784       if (v)   *v   = NULL;
1785     }
1786   }
1787   *nz  = nztot;
1788   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1789   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1790   PetscFunctionReturn(0);
1791 }
1792 
1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1794 {
1795   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1796 
1797   PetscFunctionBegin;
1798   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1799   aij->getrowactive = PETSC_FALSE;
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1804 {
1805   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1806   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1807   PetscErrorCode ierr;
1808   PetscInt       i,j,cstart = mat->cmap->rstart;
1809   PetscReal      sum = 0.0;
1810   MatScalar      *v;
1811 
1812   PetscFunctionBegin;
1813   if (aij->size == 1) {
1814     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1815   } else {
1816     if (type == NORM_FROBENIUS) {
1817       v = amat->a;
1818       for (i=0; i<amat->nz; i++) {
1819         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1820       }
1821       v = bmat->a;
1822       for (i=0; i<bmat->nz; i++) {
1823         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1824       }
1825       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1826       *norm = PetscSqrtReal(*norm);
1827       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1828     } else if (type == NORM_1) { /* max column norm */
1829       PetscReal *tmp,*tmp2;
1830       PetscInt  *jj,*garray = aij->garray;
1831       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1832       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1833       *norm = 0.0;
1834       v     = amat->a; jj = amat->j;
1835       for (j=0; j<amat->nz; j++) {
1836         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1837       }
1838       v = bmat->a; jj = bmat->j;
1839       for (j=0; j<bmat->nz; j++) {
1840         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1841       }
1842       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1843       for (j=0; j<mat->cmap->N; j++) {
1844         if (tmp2[j] > *norm) *norm = tmp2[j];
1845       }
1846       ierr = PetscFree(tmp);CHKERRQ(ierr);
1847       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1848       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1849     } else if (type == NORM_INFINITY) { /* max row norm */
1850       PetscReal ntemp = 0.0;
1851       for (j=0; j<aij->A->rmap->n; j++) {
1852         v   = amat->a + amat->i[j];
1853         sum = 0.0;
1854         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1855           sum += PetscAbsScalar(*v); v++;
1856         }
1857         v = bmat->a + bmat->i[j];
1858         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1859           sum += PetscAbsScalar(*v); v++;
1860         }
1861         if (sum > ntemp) ntemp = sum;
1862       }
1863       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1864       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1865     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1866   }
1867   PetscFunctionReturn(0);
1868 }
1869 
1870 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1871 {
1872   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1873   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1874   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1875   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1876   PetscErrorCode  ierr;
1877   Mat             B,A_diag,*B_diag;
1878   const MatScalar *pbv,*bv;
1879 
1880   PetscFunctionBegin;
1881   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1882   ai = Aloc->i; aj = Aloc->j;
1883   bi = Bloc->i; bj = Bloc->j;
1884   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1885     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1886     PetscSFNode          *oloc;
1887     PETSC_UNUSED PetscSF sf;
1888 
1889     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1890     /* compute d_nnz for preallocation */
1891     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1892     for (i=0; i<ai[ma]; i++) {
1893       d_nnz[aj[i]]++;
1894     }
1895     /* compute local off-diagonal contributions */
1896     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1897     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1898     /* map those to global */
1899     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1900     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1901     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1902     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1903     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1904     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1905     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1906 
1907     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1908     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1909     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1910     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1911     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1912     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1913   } else {
1914     B    = *matout;
1915     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1916   }
1917 
1918   b           = (Mat_MPIAIJ*)B->data;
1919   A_diag      = a->A;
1920   B_diag      = &b->A;
1921   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1922   A_diag_ncol = A_diag->cmap->N;
1923   B_diag_ilen = sub_B_diag->ilen;
1924   B_diag_i    = sub_B_diag->i;
1925 
1926   /* Set ilen for diagonal of B */
1927   for (i=0; i<A_diag_ncol; i++) {
1928     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1929   }
1930 
1931   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1932   very quickly (=without using MatSetValues), because all writes are local. */
1933   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1934 
1935   /* copy over the B part */
1936   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1937   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1938   pbv  = bv;
1939   row  = A->rmap->rstart;
1940   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1941   cols_tmp = cols;
1942   for (i=0; i<mb; i++) {
1943     ncol = bi[i+1]-bi[i];
1944     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1945     row++;
1946     pbv += ncol; cols_tmp += ncol;
1947   }
1948   ierr = PetscFree(cols);CHKERRQ(ierr);
1949   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1950 
1951   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1953   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1954     *matout = B;
1955   } else {
1956     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1962 {
1963   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1964   Mat            a    = aij->A,b = aij->B;
1965   PetscErrorCode ierr;
1966   PetscInt       s1,s2,s3;
1967 
1968   PetscFunctionBegin;
1969   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1970   if (rr) {
1971     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1972     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1973     /* Overlap communication with computation. */
1974     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1975   }
1976   if (ll) {
1977     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1978     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1979     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1980   }
1981   /* scale  the diagonal block */
1982   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1983 
1984   if (rr) {
1985     /* Do a scatter end and then right scale the off-diagonal block */
1986     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1987     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
1988   }
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1993 {
1994   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1995   PetscErrorCode ierr;
1996 
1997   PetscFunctionBegin;
1998   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2003 {
2004   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2005   Mat            a,b,c,d;
2006   PetscBool      flg;
2007   PetscErrorCode ierr;
2008 
2009   PetscFunctionBegin;
2010   a = matA->A; b = matA->B;
2011   c = matB->A; d = matB->B;
2012 
2013   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2014   if (flg) {
2015     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2016   }
2017   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRMPI(ierr);
2018   PetscFunctionReturn(0);
2019 }
2020 
2021 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2022 {
2023   PetscErrorCode ierr;
2024   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2025   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2026 
2027   PetscFunctionBegin;
2028   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2029   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2030     /* because of the column compression in the off-processor part of the matrix a->B,
2031        the number of columns in a->B and b->B may be different, hence we cannot call
2032        the MatCopy() directly on the two parts. If need be, we can provide a more
2033        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2034        then copying the submatrices */
2035     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2036   } else {
2037     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2038     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2039   }
2040   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2045 {
2046   PetscErrorCode ierr;
2047 
2048   PetscFunctionBegin;
2049   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 /*
2054    Computes the number of nonzeros per row needed for preallocation when X and Y
2055    have different nonzero structure.
2056 */
2057 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2058 {
2059   PetscInt       i,j,k,nzx,nzy;
2060 
2061   PetscFunctionBegin;
2062   /* Set the number of nonzeros in the new matrix */
2063   for (i=0; i<m; i++) {
2064     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2065     nzx = xi[i+1] - xi[i];
2066     nzy = yi[i+1] - yi[i];
2067     nnz[i] = 0;
2068     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2069       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2070       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2071       nnz[i]++;
2072     }
2073     for (; k<nzy; k++) nnz[i]++;
2074   }
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2079 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2080 {
2081   PetscErrorCode ierr;
2082   PetscInt       m = Y->rmap->N;
2083   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2084   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2085 
2086   PetscFunctionBegin;
2087   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2092 {
2093   PetscErrorCode ierr;
2094   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2095 
2096   PetscFunctionBegin;
2097   if (str == SAME_NONZERO_PATTERN) {
2098     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2099     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2100   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2101     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2102   } else {
2103     Mat      B;
2104     PetscInt *nnz_d,*nnz_o;
2105 
2106     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2107     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2108     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2109     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2110     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2111     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2112     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2113     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2114     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2115     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2116     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2117     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2118     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2119   }
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat);
2124 
2125 PetscErrorCode MatConjugate_MPIAIJ(Mat mat)
2126 {
2127 #if defined(PETSC_USE_COMPLEX)
2128   PetscErrorCode ierr;
2129   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2130 
2131   PetscFunctionBegin;
2132   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2133   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2134 #else
2135   PetscFunctionBegin;
2136 #endif
2137   PetscFunctionReturn(0);
2138 }
2139 
2140 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2141 {
2142   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2147   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2152 {
2153   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2154   PetscErrorCode ierr;
2155 
2156   PetscFunctionBegin;
2157   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2158   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2159   PetscFunctionReturn(0);
2160 }
2161 
2162 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2163 {
2164   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2165   PetscErrorCode    ierr;
2166   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2167   PetscScalar       *va,*vv;
2168   Vec               vB,vA;
2169   const PetscScalar *vb;
2170 
2171   PetscFunctionBegin;
2172   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2173   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2174 
2175   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2176   if (idx) {
2177     for (i=0; i<m; i++) {
2178       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2179     }
2180   }
2181 
2182   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2183   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2184   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2185 
2186   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2187   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2188   for (i=0; i<m; i++) {
2189     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2190       vv[i] = vb[i];
2191       if (idx) idx[i] = a->garray[idxb[i]];
2192     } else {
2193       vv[i] = va[i];
2194       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2195         idx[i] = a->garray[idxb[i]];
2196     }
2197   }
2198   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2199   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2200   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2201   ierr = PetscFree(idxb);CHKERRQ(ierr);
2202   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2203   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2204   PetscFunctionReturn(0);
2205 }
2206 
2207 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2208 {
2209   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2210   PetscInt          m = A->rmap->n,n = A->cmap->n;
2211   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2212   PetscInt          *cmap  = mat->garray;
2213   PetscInt          *diagIdx, *offdiagIdx;
2214   Vec               diagV, offdiagV;
2215   PetscScalar       *a, *diagA, *offdiagA;
2216   const PetscScalar *ba,*bav;
2217   PetscInt          r,j,col,ncols,*bi,*bj;
2218   PetscErrorCode    ierr;
2219   Mat               B = mat->B;
2220   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2221 
2222   PetscFunctionBegin;
2223   /* When a process holds entire A and other processes have no entry */
2224   if (A->cmap->N == n) {
2225     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2226     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2227     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2228     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2229     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2230     PetscFunctionReturn(0);
2231   } else if (n == 0) {
2232     if (m) {
2233       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2234       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2235       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2236     }
2237     PetscFunctionReturn(0);
2238   }
2239 
2240   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2241   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2242   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2243   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2244 
2245   /* Get offdiagIdx[] for implicit 0.0 */
2246   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2247   ba   = bav;
2248   bi   = b->i;
2249   bj   = b->j;
2250   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2251   for (r = 0; r < m; r++) {
2252     ncols = bi[r+1] - bi[r];
2253     if (ncols == A->cmap->N - n) { /* Brow is dense */
2254       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2255     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2256       offdiagA[r] = 0.0;
2257 
2258       /* Find first hole in the cmap */
2259       for (j=0; j<ncols; j++) {
2260         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2261         if (col > j && j < cstart) {
2262           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2263           break;
2264         } else if (col > j + n && j >= cstart) {
2265           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2266           break;
2267         }
2268       }
2269       if (j == ncols && ncols < A->cmap->N - n) {
2270         /* a hole is outside compressed Bcols */
2271         if (ncols == 0) {
2272           if (cstart) {
2273             offdiagIdx[r] = 0;
2274           } else offdiagIdx[r] = cend;
2275         } else { /* ncols > 0 */
2276           offdiagIdx[r] = cmap[ncols-1] + 1;
2277           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2278         }
2279       }
2280     }
2281 
2282     for (j=0; j<ncols; j++) {
2283       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2284       ba++; bj++;
2285     }
2286   }
2287 
2288   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2289   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2290   for (r = 0; r < m; ++r) {
2291     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2292       a[r]   = diagA[r];
2293       if (idx) idx[r] = cstart + diagIdx[r];
2294     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2295       a[r] = diagA[r];
2296       if (idx) {
2297         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2298           idx[r] = cstart + diagIdx[r];
2299         } else idx[r] = offdiagIdx[r];
2300       }
2301     } else {
2302       a[r]   = offdiagA[r];
2303       if (idx) idx[r] = offdiagIdx[r];
2304     }
2305   }
2306   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2307   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2308   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2309   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2310   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2311   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2312   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2313   PetscFunctionReturn(0);
2314 }
2315 
2316 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2317 {
2318   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2319   PetscInt          m = A->rmap->n,n = A->cmap->n;
2320   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2321   PetscInt          *cmap  = mat->garray;
2322   PetscInt          *diagIdx, *offdiagIdx;
2323   Vec               diagV, offdiagV;
2324   PetscScalar       *a, *diagA, *offdiagA;
2325   const PetscScalar *ba,*bav;
2326   PetscInt          r,j,col,ncols,*bi,*bj;
2327   PetscErrorCode    ierr;
2328   Mat               B = mat->B;
2329   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2330 
2331   PetscFunctionBegin;
2332   /* When a process holds entire A and other processes have no entry */
2333   if (A->cmap->N == n) {
2334     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2335     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2336     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2337     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2338     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2339     PetscFunctionReturn(0);
2340   } else if (n == 0) {
2341     if (m) {
2342       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2343       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2344       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2345     }
2346     PetscFunctionReturn(0);
2347   }
2348 
2349   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2350   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2351   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2352   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2353 
2354   /* Get offdiagIdx[] for implicit 0.0 */
2355   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2356   ba   = bav;
2357   bi   = b->i;
2358   bj   = b->j;
2359   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2360   for (r = 0; r < m; r++) {
2361     ncols = bi[r+1] - bi[r];
2362     if (ncols == A->cmap->N - n) { /* Brow is dense */
2363       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2364     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2365       offdiagA[r] = 0.0;
2366 
2367       /* Find first hole in the cmap */
2368       for (j=0; j<ncols; j++) {
2369         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2370         if (col > j && j < cstart) {
2371           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2372           break;
2373         } else if (col > j + n && j >= cstart) {
2374           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2375           break;
2376         }
2377       }
2378       if (j == ncols && ncols < A->cmap->N - n) {
2379         /* a hole is outside compressed Bcols */
2380         if (ncols == 0) {
2381           if (cstart) {
2382             offdiagIdx[r] = 0;
2383           } else offdiagIdx[r] = cend;
2384         } else { /* ncols > 0 */
2385           offdiagIdx[r] = cmap[ncols-1] + 1;
2386           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2387         }
2388       }
2389     }
2390 
2391     for (j=0; j<ncols; j++) {
2392       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2393       ba++; bj++;
2394     }
2395   }
2396 
2397   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2398   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2399   for (r = 0; r < m; ++r) {
2400     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2401       a[r]   = diagA[r];
2402       if (idx) idx[r] = cstart + diagIdx[r];
2403     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2404       a[r] = diagA[r];
2405       if (idx) {
2406         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2407           idx[r] = cstart + diagIdx[r];
2408         } else idx[r] = offdiagIdx[r];
2409       }
2410     } else {
2411       a[r]   = offdiagA[r];
2412       if (idx) idx[r] = offdiagIdx[r];
2413     }
2414   }
2415   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2416   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2417   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2418   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2419   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2420   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2421   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2426 {
2427   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2428   PetscInt          m = A->rmap->n,n = A->cmap->n;
2429   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2430   PetscInt          *cmap  = mat->garray;
2431   PetscInt          *diagIdx, *offdiagIdx;
2432   Vec               diagV, offdiagV;
2433   PetscScalar       *a, *diagA, *offdiagA;
2434   const PetscScalar *ba,*bav;
2435   PetscInt          r,j,col,ncols,*bi,*bj;
2436   PetscErrorCode    ierr;
2437   Mat               B = mat->B;
2438   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2439 
2440   PetscFunctionBegin;
2441   /* When a process holds entire A and other processes have no entry */
2442   if (A->cmap->N == n) {
2443     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2444     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2445     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2446     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2447     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2448     PetscFunctionReturn(0);
2449   } else if (n == 0) {
2450     if (m) {
2451       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2452       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2453       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2454     }
2455     PetscFunctionReturn(0);
2456   }
2457 
2458   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2459   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2460   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2461   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2462 
2463   /* Get offdiagIdx[] for implicit 0.0 */
2464   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2465   ba   = bav;
2466   bi   = b->i;
2467   bj   = b->j;
2468   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2469   for (r = 0; r < m; r++) {
2470     ncols = bi[r+1] - bi[r];
2471     if (ncols == A->cmap->N - n) { /* Brow is dense */
2472       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2473     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2474       offdiagA[r] = 0.0;
2475 
2476       /* Find first hole in the cmap */
2477       for (j=0; j<ncols; j++) {
2478         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2479         if (col > j && j < cstart) {
2480           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2481           break;
2482         } else if (col > j + n && j >= cstart) {
2483           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2484           break;
2485         }
2486       }
2487       if (j == ncols && ncols < A->cmap->N - n) {
2488         /* a hole is outside compressed Bcols */
2489         if (ncols == 0) {
2490           if (cstart) {
2491             offdiagIdx[r] = 0;
2492           } else offdiagIdx[r] = cend;
2493         } else { /* ncols > 0 */
2494           offdiagIdx[r] = cmap[ncols-1] + 1;
2495           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2496         }
2497       }
2498     }
2499 
2500     for (j=0; j<ncols; j++) {
2501       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2502       ba++; bj++;
2503     }
2504   }
2505 
2506   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2507   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2508   for (r = 0; r < m; ++r) {
2509     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2510       a[r] = diagA[r];
2511       if (idx) idx[r] = cstart + diagIdx[r];
2512     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2513       a[r] = diagA[r];
2514       if (idx) {
2515         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2516           idx[r] = cstart + diagIdx[r];
2517         } else idx[r] = offdiagIdx[r];
2518       }
2519     } else {
2520       a[r] = offdiagA[r];
2521       if (idx) idx[r] = offdiagIdx[r];
2522     }
2523   }
2524   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2525   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2526   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2527   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2528   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2529   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2530   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2535 {
2536   PetscErrorCode ierr;
2537   Mat            *dummy;
2538 
2539   PetscFunctionBegin;
2540   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2541   *newmat = *dummy;
2542   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2547 {
2548   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2549   PetscErrorCode ierr;
2550 
2551   PetscFunctionBegin;
2552   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2553   A->factorerrortype = a->A->factorerrortype;
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2558 {
2559   PetscErrorCode ierr;
2560   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2561 
2562   PetscFunctionBegin;
2563   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2564   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2565   if (x->assembled) {
2566     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2567   } else {
2568     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2569   }
2570   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2571   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2576 {
2577   PetscFunctionBegin;
2578   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2579   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 /*@
2584    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2585 
2586    Collective on Mat
2587 
2588    Input Parameters:
2589 +    A - the matrix
2590 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2591 
2592  Level: advanced
2593 
2594 @*/
2595 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2596 {
2597   PetscErrorCode       ierr;
2598 
2599   PetscFunctionBegin;
2600   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2605 {
2606   PetscErrorCode       ierr;
2607   PetscBool            sc = PETSC_FALSE,flg;
2608 
2609   PetscFunctionBegin;
2610   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2611   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2612   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2613   if (flg) {
2614     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2615   }
2616   ierr = PetscOptionsTail();CHKERRQ(ierr);
2617   PetscFunctionReturn(0);
2618 }
2619 
2620 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2621 {
2622   PetscErrorCode ierr;
2623   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2624   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2625 
2626   PetscFunctionBegin;
2627   if (!Y->preallocated) {
2628     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2629   } else if (!aij->nz) {
2630     PetscInt nonew = aij->nonew;
2631     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2632     aij->nonew = nonew;
2633   }
2634   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2635   PetscFunctionReturn(0);
2636 }
2637 
2638 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2639 {
2640   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2641   PetscErrorCode ierr;
2642 
2643   PetscFunctionBegin;
2644   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2645   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2646   if (d) {
2647     PetscInt rstart;
2648     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2649     *d += rstart;
2650 
2651   }
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2656 {
2657   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2658   PetscErrorCode ierr;
2659 
2660   PetscFunctionBegin;
2661   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2662   PetscFunctionReturn(0);
2663 }
2664 
2665 /* -------------------------------------------------------------------*/
2666 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2667                                        MatGetRow_MPIAIJ,
2668                                        MatRestoreRow_MPIAIJ,
2669                                        MatMult_MPIAIJ,
2670                                 /* 4*/ MatMultAdd_MPIAIJ,
2671                                        MatMultTranspose_MPIAIJ,
2672                                        MatMultTransposeAdd_MPIAIJ,
2673                                        NULL,
2674                                        NULL,
2675                                        NULL,
2676                                 /*10*/ NULL,
2677                                        NULL,
2678                                        NULL,
2679                                        MatSOR_MPIAIJ,
2680                                        MatTranspose_MPIAIJ,
2681                                 /*15*/ MatGetInfo_MPIAIJ,
2682                                        MatEqual_MPIAIJ,
2683                                        MatGetDiagonal_MPIAIJ,
2684                                        MatDiagonalScale_MPIAIJ,
2685                                        MatNorm_MPIAIJ,
2686                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2687                                        MatAssemblyEnd_MPIAIJ,
2688                                        MatSetOption_MPIAIJ,
2689                                        MatZeroEntries_MPIAIJ,
2690                                 /*24*/ MatZeroRows_MPIAIJ,
2691                                        NULL,
2692                                        NULL,
2693                                        NULL,
2694                                        NULL,
2695                                 /*29*/ MatSetUp_MPIAIJ,
2696                                        NULL,
2697                                        NULL,
2698                                        MatGetDiagonalBlock_MPIAIJ,
2699                                        NULL,
2700                                 /*34*/ MatDuplicate_MPIAIJ,
2701                                        NULL,
2702                                        NULL,
2703                                        NULL,
2704                                        NULL,
2705                                 /*39*/ MatAXPY_MPIAIJ,
2706                                        MatCreateSubMatrices_MPIAIJ,
2707                                        MatIncreaseOverlap_MPIAIJ,
2708                                        MatGetValues_MPIAIJ,
2709                                        MatCopy_MPIAIJ,
2710                                 /*44*/ MatGetRowMax_MPIAIJ,
2711                                        MatScale_MPIAIJ,
2712                                        MatShift_MPIAIJ,
2713                                        MatDiagonalSet_MPIAIJ,
2714                                        MatZeroRowsColumns_MPIAIJ,
2715                                 /*49*/ MatSetRandom_MPIAIJ,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                        NULL,
2720                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2721                                        NULL,
2722                                        MatSetUnfactored_MPIAIJ,
2723                                        MatPermute_MPIAIJ,
2724                                        NULL,
2725                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2726                                        MatDestroy_MPIAIJ,
2727                                        MatView_MPIAIJ,
2728                                        NULL,
2729                                        NULL,
2730                                 /*64*/ NULL,
2731                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2732                                        NULL,
2733                                        NULL,
2734                                        NULL,
2735                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2736                                        MatGetRowMinAbs_MPIAIJ,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                 /*75*/ MatFDColoringApply_AIJ,
2742                                        MatSetFromOptions_MPIAIJ,
2743                                        NULL,
2744                                        NULL,
2745                                        MatFindZeroDiagonals_MPIAIJ,
2746                                 /*80*/ NULL,
2747                                        NULL,
2748                                        NULL,
2749                                 /*83*/ MatLoad_MPIAIJ,
2750                                        MatIsSymmetric_MPIAIJ,
2751                                        NULL,
2752                                        NULL,
2753                                        NULL,
2754                                        NULL,
2755                                 /*89*/ NULL,
2756                                        NULL,
2757                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2758                                        NULL,
2759                                        NULL,
2760                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2761                                        NULL,
2762                                        NULL,
2763                                        NULL,
2764                                        MatBindToCPU_MPIAIJ,
2765                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2766                                        NULL,
2767                                        NULL,
2768                                        MatConjugate_MPIAIJ,
2769                                        NULL,
2770                                 /*104*/MatSetValuesRow_MPIAIJ,
2771                                        MatRealPart_MPIAIJ,
2772                                        MatImaginaryPart_MPIAIJ,
2773                                        NULL,
2774                                        NULL,
2775                                 /*109*/NULL,
2776                                        NULL,
2777                                        MatGetRowMin_MPIAIJ,
2778                                        NULL,
2779                                        MatMissingDiagonal_MPIAIJ,
2780                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2781                                        NULL,
2782                                        MatGetGhosts_MPIAIJ,
2783                                        NULL,
2784                                        NULL,
2785                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2786                                        NULL,
2787                                        NULL,
2788                                        NULL,
2789                                        MatGetMultiProcBlock_MPIAIJ,
2790                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2791                                        MatGetColumnNorms_MPIAIJ,
2792                                        MatInvertBlockDiagonal_MPIAIJ,
2793                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2794                                        MatCreateSubMatricesMPI_MPIAIJ,
2795                                 /*129*/NULL,
2796                                        NULL,
2797                                        NULL,
2798                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2799                                        NULL,
2800                                 /*134*/NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                        NULL,
2805                                 /*139*/MatSetBlockSizes_MPIAIJ,
2806                                        NULL,
2807                                        NULL,
2808                                        MatFDColoringSetUp_MPIXAIJ,
2809                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2810                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2811                                 /*145*/NULL,
2812                                        NULL,
2813                                        NULL
2814 };
2815 
2816 /* ----------------------------------------------------------------------------------------*/
2817 
2818 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2819 {
2820   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2821   PetscErrorCode ierr;
2822 
2823   PetscFunctionBegin;
2824   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2825   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2826   PetscFunctionReturn(0);
2827 }
2828 
2829 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2830 {
2831   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2836   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2841 {
2842   Mat_MPIAIJ     *b;
2843   PetscErrorCode ierr;
2844   PetscMPIInt    size;
2845 
2846   PetscFunctionBegin;
2847   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2848   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2849   b = (Mat_MPIAIJ*)B->data;
2850 
2851 #if defined(PETSC_USE_CTABLE)
2852   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2853 #else
2854   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2855 #endif
2856   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2857   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2858   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2859 
2860   /* Because the B will have been resized we simply destroy it and create a new one each time */
2861   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2862   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2863   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2864   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2865   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2866   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2867   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2868 
2869   if (!B->preallocated) {
2870     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2871     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2872     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2873     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2874     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2875   }
2876 
2877   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2878   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2879   B->preallocated  = PETSC_TRUE;
2880   B->was_assembled = PETSC_FALSE;
2881   B->assembled     = PETSC_FALSE;
2882   PetscFunctionReturn(0);
2883 }
2884 
2885 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2886 {
2887   Mat_MPIAIJ     *b;
2888   PetscErrorCode ierr;
2889 
2890   PetscFunctionBegin;
2891   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2892   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2893   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2894   b = (Mat_MPIAIJ*)B->data;
2895 
2896 #if defined(PETSC_USE_CTABLE)
2897   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2898 #else
2899   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2900 #endif
2901   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2902   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2903   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2904 
2905   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2906   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2907   B->preallocated  = PETSC_TRUE;
2908   B->was_assembled = PETSC_FALSE;
2909   B->assembled = PETSC_FALSE;
2910   PetscFunctionReturn(0);
2911 }
2912 
2913 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2914 {
2915   Mat            mat;
2916   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2917   PetscErrorCode ierr;
2918 
2919   PetscFunctionBegin;
2920   *newmat = NULL;
2921   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2922   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2923   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2924   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2925   a       = (Mat_MPIAIJ*)mat->data;
2926 
2927   mat->factortype   = matin->factortype;
2928   mat->assembled    = matin->assembled;
2929   mat->insertmode   = NOT_SET_VALUES;
2930   mat->preallocated = matin->preallocated;
2931 
2932   a->size         = oldmat->size;
2933   a->rank         = oldmat->rank;
2934   a->donotstash   = oldmat->donotstash;
2935   a->roworiented  = oldmat->roworiented;
2936   a->rowindices   = NULL;
2937   a->rowvalues    = NULL;
2938   a->getrowactive = PETSC_FALSE;
2939 
2940   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2941   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2942 
2943   if (oldmat->colmap) {
2944 #if defined(PETSC_USE_CTABLE)
2945     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2946 #else
2947     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2948     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2949     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2950 #endif
2951   } else a->colmap = NULL;
2952   if (oldmat->garray) {
2953     PetscInt len;
2954     len  = oldmat->B->cmap->n;
2955     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2956     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2957     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2958   } else a->garray = NULL;
2959 
2960   /* It may happen MatDuplicate is called with a non-assembled matrix
2961      In fact, MatDuplicate only requires the matrix to be preallocated
2962      This may happen inside a DMCreateMatrix_Shell */
2963   if (oldmat->lvec) {
2964     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2965     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2966   }
2967   if (oldmat->Mvctx) {
2968     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2969     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2970   }
2971   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2972   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2973   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2974   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2975   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2976   *newmat = mat;
2977   PetscFunctionReturn(0);
2978 }
2979 
2980 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2981 {
2982   PetscBool      isbinary, ishdf5;
2983   PetscErrorCode ierr;
2984 
2985   PetscFunctionBegin;
2986   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2987   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2988   /* force binary viewer to load .info file if it has not yet done so */
2989   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2990   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2991   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2992   if (isbinary) {
2993     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2994   } else if (ishdf5) {
2995 #if defined(PETSC_HAVE_HDF5)
2996     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2997 #else
2998     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2999 #endif
3000   } else {
3001     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3002   }
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3007 {
3008   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3009   PetscInt       *rowidxs,*colidxs;
3010   PetscScalar    *matvals;
3011   PetscErrorCode ierr;
3012 
3013   PetscFunctionBegin;
3014   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3015 
3016   /* read in matrix header */
3017   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3018   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3019   M  = header[1]; N = header[2]; nz = header[3];
3020   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3021   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3022   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3023 
3024   /* set block sizes from the viewer's .info file */
3025   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3026   /* set global sizes if not set already */
3027   if (mat->rmap->N < 0) mat->rmap->N = M;
3028   if (mat->cmap->N < 0) mat->cmap->N = N;
3029   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3030   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3031 
3032   /* check if the matrix sizes are correct */
3033   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3034   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3035 
3036   /* read in row lengths and build row indices */
3037   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3038   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3039   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3040   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3041   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRMPI(ierr);
3042   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3043   /* read in column indices and matrix values */
3044   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3045   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3046   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3047   /* store matrix indices and values */
3048   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3049   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3050   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 
3054 /* Not scalable because of ISAllGather() unless getting all columns. */
3055 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3056 {
3057   PetscErrorCode ierr;
3058   IS             iscol_local;
3059   PetscBool      isstride;
3060   PetscMPIInt    lisstride=0,gisstride;
3061 
3062   PetscFunctionBegin;
3063   /* check if we are grabbing all columns*/
3064   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3065 
3066   if (isstride) {
3067     PetscInt  start,len,mstart,mlen;
3068     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3069     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3070     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3071     if (mstart == start && mlen-mstart == len) lisstride = 1;
3072   }
3073 
3074   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3075   if (gisstride) {
3076     PetscInt N;
3077     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3078     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3079     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3080     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3081   } else {
3082     PetscInt cbs;
3083     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3084     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3085     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3086   }
3087 
3088   *isseq = iscol_local;
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 /*
3093  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3094  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3095 
3096  Input Parameters:
3097    mat - matrix
3098    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3099            i.e., mat->rstart <= isrow[i] < mat->rend
3100    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3101            i.e., mat->cstart <= iscol[i] < mat->cend
3102  Output Parameter:
3103    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3104    iscol_o - sequential column index set for retrieving mat->B
3105    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3106  */
3107 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3108 {
3109   PetscErrorCode ierr;
3110   Vec            x,cmap;
3111   const PetscInt *is_idx;
3112   PetscScalar    *xarray,*cmaparray;
3113   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3114   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3115   Mat            B=a->B;
3116   Vec            lvec=a->lvec,lcmap;
3117   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3118   MPI_Comm       comm;
3119   VecScatter     Mvctx=a->Mvctx;
3120 
3121   PetscFunctionBegin;
3122   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3123   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3124 
3125   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3126   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3127   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3128   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3129   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3130 
3131   /* Get start indices */
3132   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3133   isstart -= ncols;
3134   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3135 
3136   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3137   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3138   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3139   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3140   for (i=0; i<ncols; i++) {
3141     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3142     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3143     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3144   }
3145   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3146   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3147   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3148 
3149   /* Get iscol_d */
3150   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3151   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3152   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3153 
3154   /* Get isrow_d */
3155   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3156   rstart = mat->rmap->rstart;
3157   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3158   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3159   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3160   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3161 
3162   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3163   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3164   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3165 
3166   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3167   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3168   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3169 
3170   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3171 
3172   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3173   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3174 
3175   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3176   /* off-process column indices */
3177   count = 0;
3178   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3179   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3180 
3181   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3182   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3183   for (i=0; i<Bn; i++) {
3184     if (PetscRealPart(xarray[i]) > -1.0) {
3185       idx[count]     = i;                   /* local column index in off-diagonal part B */
3186       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3187       count++;
3188     }
3189   }
3190   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3191   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3192 
3193   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3194   /* cannot ensure iscol_o has same blocksize as iscol! */
3195 
3196   ierr = PetscFree(idx);CHKERRQ(ierr);
3197   *garray = cmap1;
3198 
3199   ierr = VecDestroy(&x);CHKERRQ(ierr);
3200   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3201   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3202   PetscFunctionReturn(0);
3203 }
3204 
3205 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3206 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3207 {
3208   PetscErrorCode ierr;
3209   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3210   Mat            M = NULL;
3211   MPI_Comm       comm;
3212   IS             iscol_d,isrow_d,iscol_o;
3213   Mat            Asub = NULL,Bsub = NULL;
3214   PetscInt       n;
3215 
3216   PetscFunctionBegin;
3217   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3218 
3219   if (call == MAT_REUSE_MATRIX) {
3220     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3221     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3222     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3223 
3224     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3225     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3226 
3227     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3228     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3229 
3230     /* Update diagonal and off-diagonal portions of submat */
3231     asub = (Mat_MPIAIJ*)(*submat)->data;
3232     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3233     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3234     if (n) {
3235       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3236     }
3237     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3239 
3240   } else { /* call == MAT_INITIAL_MATRIX) */
3241     const PetscInt *garray;
3242     PetscInt        BsubN;
3243 
3244     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3245     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3246 
3247     /* Create local submatrices Asub and Bsub */
3248     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3249     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3250 
3251     /* Create submatrix M */
3252     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3253 
3254     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3255     asub = (Mat_MPIAIJ*)M->data;
3256 
3257     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3258     n = asub->B->cmap->N;
3259     if (BsubN > n) {
3260       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3261       const PetscInt *idx;
3262       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3263       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3264 
3265       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3266       j = 0;
3267       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3268       for (i=0; i<n; i++) {
3269         if (j >= BsubN) break;
3270         while (subgarray[i] > garray[j]) j++;
3271 
3272         if (subgarray[i] == garray[j]) {
3273           idx_new[i] = idx[j++];
3274         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3275       }
3276       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3277 
3278       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3279       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3280 
3281     } else if (BsubN < n) {
3282       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3283     }
3284 
3285     ierr = PetscFree(garray);CHKERRQ(ierr);
3286     *submat = M;
3287 
3288     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3289     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3290     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3291 
3292     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3293     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3294 
3295     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3296     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3297   }
3298   PetscFunctionReturn(0);
3299 }
3300 
3301 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3302 {
3303   PetscErrorCode ierr;
3304   IS             iscol_local=NULL,isrow_d;
3305   PetscInt       csize;
3306   PetscInt       n,i,j,start,end;
3307   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3308   MPI_Comm       comm;
3309 
3310   PetscFunctionBegin;
3311   /* If isrow has same processor distribution as mat,
3312      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3313   if (call == MAT_REUSE_MATRIX) {
3314     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3315     if (isrow_d) {
3316       sameRowDist  = PETSC_TRUE;
3317       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3318     } else {
3319       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3320       if (iscol_local) {
3321         sameRowDist  = PETSC_TRUE;
3322         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3323       }
3324     }
3325   } else {
3326     /* Check if isrow has same processor distribution as mat */
3327     sameDist[0] = PETSC_FALSE;
3328     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3329     if (!n) {
3330       sameDist[0] = PETSC_TRUE;
3331     } else {
3332       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3333       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3334       if (i >= start && j < end) {
3335         sameDist[0] = PETSC_TRUE;
3336       }
3337     }
3338 
3339     /* Check if iscol has same processor distribution as mat */
3340     sameDist[1] = PETSC_FALSE;
3341     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3342     if (!n) {
3343       sameDist[1] = PETSC_TRUE;
3344     } else {
3345       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3346       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3347       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3348     }
3349 
3350     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3351     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRMPI(ierr);
3352     sameRowDist = tsameDist[0];
3353   }
3354 
3355   if (sameRowDist) {
3356     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3357       /* isrow and iscol have same processor distribution as mat */
3358       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3359       PetscFunctionReturn(0);
3360     } else { /* sameRowDist */
3361       /* isrow has same processor distribution as mat */
3362       if (call == MAT_INITIAL_MATRIX) {
3363         PetscBool sorted;
3364         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3365         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3366         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3367         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3368 
3369         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3370         if (sorted) {
3371           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3372           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3373           PetscFunctionReturn(0);
3374         }
3375       } else { /* call == MAT_REUSE_MATRIX */
3376         IS iscol_sub;
3377         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3378         if (iscol_sub) {
3379           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3380           PetscFunctionReturn(0);
3381         }
3382       }
3383     }
3384   }
3385 
3386   /* General case: iscol -> iscol_local which has global size of iscol */
3387   if (call == MAT_REUSE_MATRIX) {
3388     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3389     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3390   } else {
3391     if (!iscol_local) {
3392       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3393     }
3394   }
3395 
3396   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3397   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3398 
3399   if (call == MAT_INITIAL_MATRIX) {
3400     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3401     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3402   }
3403   PetscFunctionReturn(0);
3404 }
3405 
3406 /*@C
3407      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3408          and "off-diagonal" part of the matrix in CSR format.
3409 
3410    Collective
3411 
3412    Input Parameters:
3413 +  comm - MPI communicator
3414 .  A - "diagonal" portion of matrix
3415 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3416 -  garray - global index of B columns
3417 
3418    Output Parameter:
3419 .   mat - the matrix, with input A as its local diagonal matrix
3420    Level: advanced
3421 
3422    Notes:
3423        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3424        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3425 
3426 .seealso: MatCreateMPIAIJWithSplitArrays()
3427 @*/
3428 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3429 {
3430   PetscErrorCode    ierr;
3431   Mat_MPIAIJ        *maij;
3432   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3433   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3434   const PetscScalar *oa;
3435   Mat               Bnew;
3436   PetscInt          m,n,N;
3437 
3438   PetscFunctionBegin;
3439   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3440   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3441   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3442   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3443   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3444   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3445 
3446   /* Get global columns of mat */
3447   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3448 
3449   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3450   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3451   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3452   maij = (Mat_MPIAIJ*)(*mat)->data;
3453 
3454   (*mat)->preallocated = PETSC_TRUE;
3455 
3456   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3457   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3458 
3459   /* Set A as diagonal portion of *mat */
3460   maij->A = A;
3461 
3462   nz = oi[m];
3463   for (i=0; i<nz; i++) {
3464     col   = oj[i];
3465     oj[i] = garray[col];
3466   }
3467 
3468   /* Set Bnew as off-diagonal portion of *mat */
3469   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3470   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3471   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3472   bnew        = (Mat_SeqAIJ*)Bnew->data;
3473   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3474   maij->B     = Bnew;
3475 
3476   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3477 
3478   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3479   b->free_a       = PETSC_FALSE;
3480   b->free_ij      = PETSC_FALSE;
3481   ierr = MatDestroy(&B);CHKERRQ(ierr);
3482 
3483   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3484   bnew->free_a       = PETSC_TRUE;
3485   bnew->free_ij      = PETSC_TRUE;
3486 
3487   /* condense columns of maij->B */
3488   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3489   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3490   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3491   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3492   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3497 
3498 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3499 {
3500   PetscErrorCode ierr;
3501   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3502   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3503   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3504   Mat            M,Msub,B=a->B;
3505   MatScalar      *aa;
3506   Mat_SeqAIJ     *aij;
3507   PetscInt       *garray = a->garray,*colsub,Ncols;
3508   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3509   IS             iscol_sub,iscmap;
3510   const PetscInt *is_idx,*cmap;
3511   PetscBool      allcolumns=PETSC_FALSE;
3512   MPI_Comm       comm;
3513 
3514   PetscFunctionBegin;
3515   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3516   if (call == MAT_REUSE_MATRIX) {
3517     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3518     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3519     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3520 
3521     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3522     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3523 
3524     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3525     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3526 
3527     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3528 
3529   } else { /* call == MAT_INITIAL_MATRIX) */
3530     PetscBool flg;
3531 
3532     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3533     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3534 
3535     /* (1) iscol -> nonscalable iscol_local */
3536     /* Check for special case: each processor gets entire matrix columns */
3537     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3538     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3539     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3540     if (allcolumns) {
3541       iscol_sub = iscol_local;
3542       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3543       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3544 
3545     } else {
3546       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3547       PetscInt *idx,*cmap1,k;
3548       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3549       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3550       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3551       count = 0;
3552       k     = 0;
3553       for (i=0; i<Ncols; i++) {
3554         j = is_idx[i];
3555         if (j >= cstart && j < cend) {
3556           /* diagonal part of mat */
3557           idx[count]     = j;
3558           cmap1[count++] = i; /* column index in submat */
3559         } else if (Bn) {
3560           /* off-diagonal part of mat */
3561           if (j == garray[k]) {
3562             idx[count]     = j;
3563             cmap1[count++] = i;  /* column index in submat */
3564           } else if (j > garray[k]) {
3565             while (j > garray[k] && k < Bn-1) k++;
3566             if (j == garray[k]) {
3567               idx[count]     = j;
3568               cmap1[count++] = i; /* column index in submat */
3569             }
3570           }
3571         }
3572       }
3573       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3574 
3575       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3576       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3577       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3578 
3579       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3580     }
3581 
3582     /* (3) Create sequential Msub */
3583     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3584   }
3585 
3586   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3587   aij  = (Mat_SeqAIJ*)(Msub)->data;
3588   ii   = aij->i;
3589   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3590 
3591   /*
3592       m - number of local rows
3593       Ncols - number of columns (same on all processors)
3594       rstart - first row in new global matrix generated
3595   */
3596   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3597 
3598   if (call == MAT_INITIAL_MATRIX) {
3599     /* (4) Create parallel newmat */
3600     PetscMPIInt    rank,size;
3601     PetscInt       csize;
3602 
3603     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3604     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3605 
3606     /*
3607         Determine the number of non-zeros in the diagonal and off-diagonal
3608         portions of the matrix in order to do correct preallocation
3609     */
3610 
3611     /* first get start and end of "diagonal" columns */
3612     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3613     if (csize == PETSC_DECIDE) {
3614       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3615       if (mglobal == Ncols) { /* square matrix */
3616         nlocal = m;
3617       } else {
3618         nlocal = Ncols/size + ((Ncols % size) > rank);
3619       }
3620     } else {
3621       nlocal = csize;
3622     }
3623     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3624     rstart = rend - nlocal;
3625     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3626 
3627     /* next, compute all the lengths */
3628     jj    = aij->j;
3629     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3630     olens = dlens + m;
3631     for (i=0; i<m; i++) {
3632       jend = ii[i+1] - ii[i];
3633       olen = 0;
3634       dlen = 0;
3635       for (j=0; j<jend; j++) {
3636         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3637         else dlen++;
3638         jj++;
3639       }
3640       olens[i] = olen;
3641       dlens[i] = dlen;
3642     }
3643 
3644     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3645     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3646 
3647     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3648     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3649     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3650     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3651     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3652     ierr = PetscFree(dlens);CHKERRQ(ierr);
3653 
3654   } else { /* call == MAT_REUSE_MATRIX */
3655     M    = *newmat;
3656     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3657     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3658     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3659     /*
3660          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3661        rather than the slower MatSetValues().
3662     */
3663     M->was_assembled = PETSC_TRUE;
3664     M->assembled     = PETSC_FALSE;
3665   }
3666 
3667   /* (5) Set values of Msub to *newmat */
3668   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3669   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3670 
3671   jj   = aij->j;
3672   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3673   for (i=0; i<m; i++) {
3674     row = rstart + i;
3675     nz  = ii[i+1] - ii[i];
3676     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3677     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3678     jj += nz; aa += nz;
3679   }
3680   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3681   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3682 
3683   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3685 
3686   ierr = PetscFree(colsub);CHKERRQ(ierr);
3687 
3688   /* save Msub, iscol_sub and iscmap used in processor for next request */
3689   if (call == MAT_INITIAL_MATRIX) {
3690     *newmat = M;
3691     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3692     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3693 
3694     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3695     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3696 
3697     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3698     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3699 
3700     if (iscol_local) {
3701       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3702       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3703     }
3704   }
3705   PetscFunctionReturn(0);
3706 }
3707 
3708 /*
3709     Not great since it makes two copies of the submatrix, first an SeqAIJ
3710   in local and then by concatenating the local matrices the end result.
3711   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3712 
3713   Note: This requires a sequential iscol with all indices.
3714 */
3715 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3716 {
3717   PetscErrorCode ierr;
3718   PetscMPIInt    rank,size;
3719   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3720   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3721   Mat            M,Mreuse;
3722   MatScalar      *aa,*vwork;
3723   MPI_Comm       comm;
3724   Mat_SeqAIJ     *aij;
3725   PetscBool      colflag,allcolumns=PETSC_FALSE;
3726 
3727   PetscFunctionBegin;
3728   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3729   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3730   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3731 
3732   /* Check for special case: each processor gets entire matrix columns */
3733   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3734   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3735   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3736   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
3737 
3738   if (call ==  MAT_REUSE_MATRIX) {
3739     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3740     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3741     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3742   } else {
3743     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3744   }
3745 
3746   /*
3747       m - number of local rows
3748       n - number of columns (same on all processors)
3749       rstart - first row in new global matrix generated
3750   */
3751   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3752   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3753   if (call == MAT_INITIAL_MATRIX) {
3754     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3755     ii  = aij->i;
3756     jj  = aij->j;
3757 
3758     /*
3759         Determine the number of non-zeros in the diagonal and off-diagonal
3760         portions of the matrix in order to do correct preallocation
3761     */
3762 
3763     /* first get start and end of "diagonal" columns */
3764     if (csize == PETSC_DECIDE) {
3765       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3766       if (mglobal == n) { /* square matrix */
3767         nlocal = m;
3768       } else {
3769         nlocal = n/size + ((n % size) > rank);
3770       }
3771     } else {
3772       nlocal = csize;
3773     }
3774     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3775     rstart = rend - nlocal;
3776     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3777 
3778     /* next, compute all the lengths */
3779     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3780     olens = dlens + m;
3781     for (i=0; i<m; i++) {
3782       jend = ii[i+1] - ii[i];
3783       olen = 0;
3784       dlen = 0;
3785       for (j=0; j<jend; j++) {
3786         if (*jj < rstart || *jj >= rend) olen++;
3787         else dlen++;
3788         jj++;
3789       }
3790       olens[i] = olen;
3791       dlens[i] = dlen;
3792     }
3793     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3794     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3795     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3796     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3797     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3798     ierr = PetscFree(dlens);CHKERRQ(ierr);
3799   } else {
3800     PetscInt ml,nl;
3801 
3802     M    = *newmat;
3803     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3804     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3805     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3806     /*
3807          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3808        rather than the slower MatSetValues().
3809     */
3810     M->was_assembled = PETSC_TRUE;
3811     M->assembled     = PETSC_FALSE;
3812   }
3813   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3814   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3815   ii   = aij->i;
3816   jj   = aij->j;
3817 
3818   /* trigger copy to CPU if needed */
3819   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3820   for (i=0; i<m; i++) {
3821     row   = rstart + i;
3822     nz    = ii[i+1] - ii[i];
3823     cwork = jj; jj += nz;
3824     vwork = aa; aa += nz;
3825     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3826   }
3827   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3828 
3829   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3831   *newmat = M;
3832 
3833   /* save submatrix used in processor for next request */
3834   if (call ==  MAT_INITIAL_MATRIX) {
3835     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3836     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3837   }
3838   PetscFunctionReturn(0);
3839 }
3840 
3841 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3842 {
3843   PetscInt       m,cstart, cend,j,nnz,i,d;
3844   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3845   const PetscInt *JJ;
3846   PetscErrorCode ierr;
3847   PetscBool      nooffprocentries;
3848 
3849   PetscFunctionBegin;
3850   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3851 
3852   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3853   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3854   m      = B->rmap->n;
3855   cstart = B->cmap->rstart;
3856   cend   = B->cmap->rend;
3857   rstart = B->rmap->rstart;
3858 
3859   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3860 
3861   if (PetscDefined(USE_DEBUG)) {
3862     for (i=0; i<m; i++) {
3863       nnz = Ii[i+1]- Ii[i];
3864       JJ  = J + Ii[i];
3865       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3866       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3867       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3868     }
3869   }
3870 
3871   for (i=0; i<m; i++) {
3872     nnz     = Ii[i+1]- Ii[i];
3873     JJ      = J + Ii[i];
3874     nnz_max = PetscMax(nnz_max,nnz);
3875     d       = 0;
3876     for (j=0; j<nnz; j++) {
3877       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3878     }
3879     d_nnz[i] = d;
3880     o_nnz[i] = nnz - d;
3881   }
3882   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3883   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3884 
3885   for (i=0; i<m; i++) {
3886     ii   = i + rstart;
3887     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3888   }
3889   nooffprocentries    = B->nooffprocentries;
3890   B->nooffprocentries = PETSC_TRUE;
3891   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3893   B->nooffprocentries = nooffprocentries;
3894 
3895   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3896   PetscFunctionReturn(0);
3897 }
3898 
3899 /*@
3900    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3901    (the default parallel PETSc format).
3902 
3903    Collective
3904 
3905    Input Parameters:
3906 +  B - the matrix
3907 .  i - the indices into j for the start of each local row (starts with zero)
3908 .  j - the column indices for each local row (starts with zero)
3909 -  v - optional values in the matrix
3910 
3911    Level: developer
3912 
3913    Notes:
3914        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3915      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3916      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3917 
3918        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3919 
3920        The format which is used for the sparse matrix input, is equivalent to a
3921     row-major ordering.. i.e for the following matrix, the input data expected is
3922     as shown
3923 
3924 $        1 0 0
3925 $        2 0 3     P0
3926 $       -------
3927 $        4 5 6     P1
3928 $
3929 $     Process0 [P0]: rows_owned=[0,1]
3930 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3931 $        j =  {0,0,2}  [size = 3]
3932 $        v =  {1,2,3}  [size = 3]
3933 $
3934 $     Process1 [P1]: rows_owned=[2]
3935 $        i =  {0,3}    [size = nrow+1  = 1+1]
3936 $        j =  {0,1,2}  [size = 3]
3937 $        v =  {4,5,6}  [size = 3]
3938 
3939 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3940           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3941 @*/
3942 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3943 {
3944   PetscErrorCode ierr;
3945 
3946   PetscFunctionBegin;
3947   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3948   PetscFunctionReturn(0);
3949 }
3950 
3951 /*@C
3952    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3953    (the default parallel PETSc format).  For good matrix assembly performance
3954    the user should preallocate the matrix storage by setting the parameters
3955    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3956    performance can be increased by more than a factor of 50.
3957 
3958    Collective
3959 
3960    Input Parameters:
3961 +  B - the matrix
3962 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3963            (same value is used for all local rows)
3964 .  d_nnz - array containing the number of nonzeros in the various rows of the
3965            DIAGONAL portion of the local submatrix (possibly different for each row)
3966            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3967            The size of this array is equal to the number of local rows, i.e 'm'.
3968            For matrices that will be factored, you must leave room for (and set)
3969            the diagonal entry even if it is zero.
3970 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3971            submatrix (same value is used for all local rows).
3972 -  o_nnz - array containing the number of nonzeros in the various rows of the
3973            OFF-DIAGONAL portion of the local submatrix (possibly different for
3974            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3975            structure. The size of this array is equal to the number
3976            of local rows, i.e 'm'.
3977 
3978    If the *_nnz parameter is given then the *_nz parameter is ignored
3979 
3980    The AIJ format (also called the Yale sparse matrix format or
3981    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3982    storage.  The stored row and column indices begin with zero.
3983    See Users-Manual: ch_mat for details.
3984 
3985    The parallel matrix is partitioned such that the first m0 rows belong to
3986    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3987    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3988 
3989    The DIAGONAL portion of the local submatrix of a processor can be defined
3990    as the submatrix which is obtained by extraction the part corresponding to
3991    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3992    first row that belongs to the processor, r2 is the last row belonging to
3993    the this processor, and c1-c2 is range of indices of the local part of a
3994    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3995    common case of a square matrix, the row and column ranges are the same and
3996    the DIAGONAL part is also square. The remaining portion of the local
3997    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3998 
3999    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4000 
4001    You can call MatGetInfo() to get information on how effective the preallocation was;
4002    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4003    You can also run with the option -info and look for messages with the string
4004    malloc in them to see if additional memory allocation was needed.
4005 
4006    Example usage:
4007 
4008    Consider the following 8x8 matrix with 34 non-zero values, that is
4009    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4010    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4011    as follows:
4012 
4013 .vb
4014             1  2  0  |  0  3  0  |  0  4
4015     Proc0   0  5  6  |  7  0  0  |  8  0
4016             9  0 10  | 11  0  0  | 12  0
4017     -------------------------------------
4018            13  0 14  | 15 16 17  |  0  0
4019     Proc1   0 18  0  | 19 20 21  |  0  0
4020             0  0  0  | 22 23  0  | 24  0
4021     -------------------------------------
4022     Proc2  25 26 27  |  0  0 28  | 29  0
4023            30  0  0  | 31 32 33  |  0 34
4024 .ve
4025 
4026    This can be represented as a collection of submatrices as:
4027 
4028 .vb
4029       A B C
4030       D E F
4031       G H I
4032 .ve
4033 
4034    Where the submatrices A,B,C are owned by proc0, D,E,F are
4035    owned by proc1, G,H,I are owned by proc2.
4036 
4037    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4038    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4039    The 'M','N' parameters are 8,8, and have the same values on all procs.
4040 
4041    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4042    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4043    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4044    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4045    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4046    matrix, ans [DF] as another SeqAIJ matrix.
4047 
4048    When d_nz, o_nz parameters are specified, d_nz storage elements are
4049    allocated for every row of the local diagonal submatrix, and o_nz
4050    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4051    One way to choose d_nz and o_nz is to use the max nonzerors per local
4052    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4053    In this case, the values of d_nz,o_nz are:
4054 .vb
4055      proc0 : dnz = 2, o_nz = 2
4056      proc1 : dnz = 3, o_nz = 2
4057      proc2 : dnz = 1, o_nz = 4
4058 .ve
4059    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4060    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4061    for proc3. i.e we are using 12+15+10=37 storage locations to store
4062    34 values.
4063 
4064    When d_nnz, o_nnz parameters are specified, the storage is specified
4065    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4066    In the above case the values for d_nnz,o_nnz are:
4067 .vb
4068      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4069      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4070      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4071 .ve
4072    Here the space allocated is sum of all the above values i.e 34, and
4073    hence pre-allocation is perfect.
4074 
4075    Level: intermediate
4076 
4077 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4078           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4079 @*/
4080 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4081 {
4082   PetscErrorCode ierr;
4083 
4084   PetscFunctionBegin;
4085   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4086   PetscValidType(B,1);
4087   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4088   PetscFunctionReturn(0);
4089 }
4090 
4091 /*@
4092      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4093          CSR format for the local rows.
4094 
4095    Collective
4096 
4097    Input Parameters:
4098 +  comm - MPI communicator
4099 .  m - number of local rows (Cannot be PETSC_DECIDE)
4100 .  n - This value should be the same as the local size used in creating the
4101        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4102        calculated if N is given) For square matrices n is almost always m.
4103 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4104 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4105 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4106 .   j - column indices
4107 -   a - matrix values
4108 
4109    Output Parameter:
4110 .   mat - the matrix
4111 
4112    Level: intermediate
4113 
4114    Notes:
4115        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4116      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4117      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4118 
4119        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4120 
4121        The format which is used for the sparse matrix input, is equivalent to a
4122     row-major ordering.. i.e for the following matrix, the input data expected is
4123     as shown
4124 
4125        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4126 
4127 $        1 0 0
4128 $        2 0 3     P0
4129 $       -------
4130 $        4 5 6     P1
4131 $
4132 $     Process0 [P0]: rows_owned=[0,1]
4133 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4134 $        j =  {0,0,2}  [size = 3]
4135 $        v =  {1,2,3}  [size = 3]
4136 $
4137 $     Process1 [P1]: rows_owned=[2]
4138 $        i =  {0,3}    [size = nrow+1  = 1+1]
4139 $        j =  {0,1,2}  [size = 3]
4140 $        v =  {4,5,6}  [size = 3]
4141 
4142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4143           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4144 @*/
4145 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4146 {
4147   PetscErrorCode ierr;
4148 
4149   PetscFunctionBegin;
4150   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4151   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4152   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4153   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4154   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4155   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4156   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4157   PetscFunctionReturn(0);
4158 }
4159 
4160 /*@
4161      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4162          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4163 
4164    Collective
4165 
4166    Input Parameters:
4167 +  mat - the matrix
4168 .  m - number of local rows (Cannot be PETSC_DECIDE)
4169 .  n - This value should be the same as the local size used in creating the
4170        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4171        calculated if N is given) For square matrices n is almost always m.
4172 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4173 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4174 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4175 .  J - column indices
4176 -  v - matrix values
4177 
4178    Level: intermediate
4179 
4180 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4181           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4182 @*/
4183 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4184 {
4185   PetscErrorCode ierr;
4186   PetscInt       cstart,nnz,i,j;
4187   PetscInt       *ld;
4188   PetscBool      nooffprocentries;
4189   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4190   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4191   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4192   const PetscInt *Adi = Ad->i;
4193   PetscInt       ldi,Iii,md;
4194 
4195   PetscFunctionBegin;
4196   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4197   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4198   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4199   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4200 
4201   cstart = mat->cmap->rstart;
4202   if (!Aij->ld) {
4203     /* count number of entries below block diagonal */
4204     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4205     Aij->ld = ld;
4206     for (i=0; i<m; i++) {
4207       nnz  = Ii[i+1]- Ii[i];
4208       j     = 0;
4209       while  (J[j] < cstart && j < nnz) {j++;}
4210       J    += nnz;
4211       ld[i] = j;
4212     }
4213   } else {
4214     ld = Aij->ld;
4215   }
4216 
4217   for (i=0; i<m; i++) {
4218     nnz  = Ii[i+1]- Ii[i];
4219     Iii  = Ii[i];
4220     ldi  = ld[i];
4221     md   = Adi[i+1]-Adi[i];
4222     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4223     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4224     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4225     ad  += md;
4226     ao  += nnz - md;
4227   }
4228   nooffprocentries      = mat->nooffprocentries;
4229   mat->nooffprocentries = PETSC_TRUE;
4230   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4231   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4232   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4233   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4234   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4235   mat->nooffprocentries = nooffprocentries;
4236   PetscFunctionReturn(0);
4237 }
4238 
4239 /*@C
4240    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4241    (the default parallel PETSc format).  For good matrix assembly performance
4242    the user should preallocate the matrix storage by setting the parameters
4243    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4244    performance can be increased by more than a factor of 50.
4245 
4246    Collective
4247 
4248    Input Parameters:
4249 +  comm - MPI communicator
4250 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4251            This value should be the same as the local size used in creating the
4252            y vector for the matrix-vector product y = Ax.
4253 .  n - This value should be the same as the local size used in creating the
4254        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4255        calculated if N is given) For square matrices n is almost always m.
4256 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4257 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4258 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4259            (same value is used for all local rows)
4260 .  d_nnz - array containing the number of nonzeros in the various rows of the
4261            DIAGONAL portion of the local submatrix (possibly different for each row)
4262            or NULL, if d_nz is used to specify the nonzero structure.
4263            The size of this array is equal to the number of local rows, i.e 'm'.
4264 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4265            submatrix (same value is used for all local rows).
4266 -  o_nnz - array containing the number of nonzeros in the various rows of the
4267            OFF-DIAGONAL portion of the local submatrix (possibly different for
4268            each row) or NULL, if o_nz is used to specify the nonzero
4269            structure. The size of this array is equal to the number
4270            of local rows, i.e 'm'.
4271 
4272    Output Parameter:
4273 .  A - the matrix
4274 
4275    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4276    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4277    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4278 
4279    Notes:
4280    If the *_nnz parameter is given then the *_nz parameter is ignored
4281 
4282    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4283    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4284    storage requirements for this matrix.
4285 
4286    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4287    processor than it must be used on all processors that share the object for
4288    that argument.
4289 
4290    The user MUST specify either the local or global matrix dimensions
4291    (possibly both).
4292 
4293    The parallel matrix is partitioned across processors such that the
4294    first m0 rows belong to process 0, the next m1 rows belong to
4295    process 1, the next m2 rows belong to process 2 etc.. where
4296    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4297    values corresponding to [m x N] submatrix.
4298 
4299    The columns are logically partitioned with the n0 columns belonging
4300    to 0th partition, the next n1 columns belonging to the next
4301    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4302 
4303    The DIAGONAL portion of the local submatrix on any given processor
4304    is the submatrix corresponding to the rows and columns m,n
4305    corresponding to the given processor. i.e diagonal matrix on
4306    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4307    etc. The remaining portion of the local submatrix [m x (N-n)]
4308    constitute the OFF-DIAGONAL portion. The example below better
4309    illustrates this concept.
4310 
4311    For a square global matrix we define each processor's diagonal portion
4312    to be its local rows and the corresponding columns (a square submatrix);
4313    each processor's off-diagonal portion encompasses the remainder of the
4314    local matrix (a rectangular submatrix).
4315 
4316    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4317 
4318    When calling this routine with a single process communicator, a matrix of
4319    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4320    type of communicator, use the construction mechanism
4321 .vb
4322      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4323 .ve
4324 
4325 $     MatCreate(...,&A);
4326 $     MatSetType(A,MATMPIAIJ);
4327 $     MatSetSizes(A, m,n,M,N);
4328 $     MatMPIAIJSetPreallocation(A,...);
4329 
4330    By default, this format uses inodes (identical nodes) when possible.
4331    We search for consecutive rows with the same nonzero structure, thereby
4332    reusing matrix information to achieve increased efficiency.
4333 
4334    Options Database Keys:
4335 +  -mat_no_inode  - Do not use inodes
4336 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4337 
4338    Example usage:
4339 
4340    Consider the following 8x8 matrix with 34 non-zero values, that is
4341    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4342    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4343    as follows
4344 
4345 .vb
4346             1  2  0  |  0  3  0  |  0  4
4347     Proc0   0  5  6  |  7  0  0  |  8  0
4348             9  0 10  | 11  0  0  | 12  0
4349     -------------------------------------
4350            13  0 14  | 15 16 17  |  0  0
4351     Proc1   0 18  0  | 19 20 21  |  0  0
4352             0  0  0  | 22 23  0  | 24  0
4353     -------------------------------------
4354     Proc2  25 26 27  |  0  0 28  | 29  0
4355            30  0  0  | 31 32 33  |  0 34
4356 .ve
4357 
4358    This can be represented as a collection of submatrices as
4359 
4360 .vb
4361       A B C
4362       D E F
4363       G H I
4364 .ve
4365 
4366    Where the submatrices A,B,C are owned by proc0, D,E,F are
4367    owned by proc1, G,H,I are owned by proc2.
4368 
4369    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4370    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4371    The 'M','N' parameters are 8,8, and have the same values on all procs.
4372 
4373    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4374    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4375    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4376    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4377    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4378    matrix, ans [DF] as another SeqAIJ matrix.
4379 
4380    When d_nz, o_nz parameters are specified, d_nz storage elements are
4381    allocated for every row of the local diagonal submatrix, and o_nz
4382    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4383    One way to choose d_nz and o_nz is to use the max nonzerors per local
4384    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4385    In this case, the values of d_nz,o_nz are
4386 .vb
4387      proc0 : dnz = 2, o_nz = 2
4388      proc1 : dnz = 3, o_nz = 2
4389      proc2 : dnz = 1, o_nz = 4
4390 .ve
4391    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4392    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4393    for proc3. i.e we are using 12+15+10=37 storage locations to store
4394    34 values.
4395 
4396    When d_nnz, o_nnz parameters are specified, the storage is specified
4397    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4398    In the above case the values for d_nnz,o_nnz are
4399 .vb
4400      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4401      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4402      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4403 .ve
4404    Here the space allocated is sum of all the above values i.e 34, and
4405    hence pre-allocation is perfect.
4406 
4407    Level: intermediate
4408 
4409 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4410           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4411 @*/
4412 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4413 {
4414   PetscErrorCode ierr;
4415   PetscMPIInt    size;
4416 
4417   PetscFunctionBegin;
4418   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4419   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4420   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4421   if (size > 1) {
4422     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4423     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4424   } else {
4425     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4426     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4427   }
4428   PetscFunctionReturn(0);
4429 }
4430 
4431 /*@C
4432   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4433 
4434   Not collective
4435 
4436   Input Parameter:
4437 . A - The MPIAIJ matrix
4438 
4439   Output Parameters:
4440 + Ad - The local diagonal block as a SeqAIJ matrix
4441 . Ao - The local off-diagonal block as a SeqAIJ matrix
4442 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4443 
4444   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4445   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4446   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4447   local column numbers to global column numbers in the original matrix.
4448 
4449   Level: intermediate
4450 
4451 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4452 @*/
4453 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4454 {
4455   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4456   PetscBool      flg;
4457   PetscErrorCode ierr;
4458 
4459   PetscFunctionBegin;
4460   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4461   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4462   if (Ad)     *Ad     = a->A;
4463   if (Ao)     *Ao     = a->B;
4464   if (colmap) *colmap = a->garray;
4465   PetscFunctionReturn(0);
4466 }
4467 
4468 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4469 {
4470   PetscErrorCode ierr;
4471   PetscInt       m,N,i,rstart,nnz,Ii;
4472   PetscInt       *indx;
4473   PetscScalar    *values;
4474 
4475   PetscFunctionBegin;
4476   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4477   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4478     PetscInt       *dnz,*onz,sum,bs,cbs;
4479 
4480     if (n == PETSC_DECIDE) {
4481       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4482     }
4483     /* Check sum(n) = N */
4484     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4485     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4486 
4487     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
4488     rstart -= m;
4489 
4490     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4491     for (i=0; i<m; i++) {
4492       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4493       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4494       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4495     }
4496 
4497     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4498     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4499     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4500     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4501     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4502     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4503     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4504     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4505     ierr = MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
4506   }
4507 
4508   /* numeric phase */
4509   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4510   for (i=0; i<m; i++) {
4511     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4512     Ii   = i + rstart;
4513     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4514     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4515   }
4516   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4517   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4522 {
4523   PetscErrorCode    ierr;
4524   PetscMPIInt       rank;
4525   PetscInt          m,N,i,rstart,nnz;
4526   size_t            len;
4527   const PetscInt    *indx;
4528   PetscViewer       out;
4529   char              *name;
4530   Mat               B;
4531   const PetscScalar *values;
4532 
4533   PetscFunctionBegin;
4534   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4535   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4536   /* Should this be the type of the diagonal block of A? */
4537   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4538   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4539   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4540   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4541   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4542   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4543   for (i=0; i<m; i++) {
4544     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4545     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4546     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4547   }
4548   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4549   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4550 
4551   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4552   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4553   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4554   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4555   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4556   ierr = PetscFree(name);CHKERRQ(ierr);
4557   ierr = MatView(B,out);CHKERRQ(ierr);
4558   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4559   ierr = MatDestroy(&B);CHKERRQ(ierr);
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4564 {
4565   PetscErrorCode      ierr;
4566   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4567 
4568   PetscFunctionBegin;
4569   if (!merge) PetscFunctionReturn(0);
4570   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4571   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4572   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4573   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4574   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4575   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4576   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4577   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4578   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4579   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4580   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4581   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4582   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4583   ierr = PetscFree(merge);CHKERRQ(ierr);
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 #include <../src/mat/utils/freespace.h>
4588 #include <petscbt.h>
4589 
4590 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4591 {
4592   PetscErrorCode      ierr;
4593   MPI_Comm            comm;
4594   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4595   PetscMPIInt         size,rank,taga,*len_s;
4596   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4597   PetscInt            proc,m;
4598   PetscInt            **buf_ri,**buf_rj;
4599   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4600   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4601   MPI_Request         *s_waits,*r_waits;
4602   MPI_Status          *status;
4603   MatScalar           *aa=a->a;
4604   MatScalar           **abuf_r,*ba_i;
4605   Mat_Merge_SeqsToMPI *merge;
4606   PetscContainer      container;
4607 
4608   PetscFunctionBegin;
4609   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4610   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4611 
4612   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4613   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4614 
4615   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4616   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4617   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4618 
4619   bi     = merge->bi;
4620   bj     = merge->bj;
4621   buf_ri = merge->buf_ri;
4622   buf_rj = merge->buf_rj;
4623 
4624   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4625   owners = merge->rowmap->range;
4626   len_s  = merge->len_s;
4627 
4628   /* send and recv matrix values */
4629   /*-----------------------------*/
4630   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4631   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4632 
4633   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4634   for (proc=0,k=0; proc<size; proc++) {
4635     if (!len_s[proc]) continue;
4636     i    = owners[proc];
4637     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4638     k++;
4639   }
4640 
4641   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4642   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4643   ierr = PetscFree(status);CHKERRQ(ierr);
4644 
4645   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4646   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4647 
4648   /* insert mat values of mpimat */
4649   /*----------------------------*/
4650   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4651   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4652 
4653   for (k=0; k<merge->nrecv; k++) {
4654     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4655     nrows       = *(buf_ri_k[k]);
4656     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4657     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4658   }
4659 
4660   /* set values of ba */
4661   m = merge->rowmap->n;
4662   for (i=0; i<m; i++) {
4663     arow = owners[rank] + i;
4664     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4665     bnzi = bi[i+1] - bi[i];
4666     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4667 
4668     /* add local non-zero vals of this proc's seqmat into ba */
4669     anzi   = ai[arow+1] - ai[arow];
4670     aj     = a->j + ai[arow];
4671     aa     = a->a + ai[arow];
4672     nextaj = 0;
4673     for (j=0; nextaj<anzi; j++) {
4674       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4675         ba_i[j] += aa[nextaj++];
4676       }
4677     }
4678 
4679     /* add received vals into ba */
4680     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4681       /* i-th row */
4682       if (i == *nextrow[k]) {
4683         anzi   = *(nextai[k]+1) - *nextai[k];
4684         aj     = buf_rj[k] + *(nextai[k]);
4685         aa     = abuf_r[k] + *(nextai[k]);
4686         nextaj = 0;
4687         for (j=0; nextaj<anzi; j++) {
4688           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4689             ba_i[j] += aa[nextaj++];
4690           }
4691         }
4692         nextrow[k]++; nextai[k]++;
4693       }
4694     }
4695     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4696   }
4697   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4698   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4699 
4700   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4701   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4702   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4703   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4704   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4705   PetscFunctionReturn(0);
4706 }
4707 
4708 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4709 {
4710   PetscErrorCode      ierr;
4711   Mat                 B_mpi;
4712   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4713   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4714   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4715   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4716   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4717   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4718   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4719   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4720   MPI_Status          *status;
4721   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4722   PetscBT             lnkbt;
4723   Mat_Merge_SeqsToMPI *merge;
4724   PetscContainer      container;
4725 
4726   PetscFunctionBegin;
4727   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4728 
4729   /* make sure it is a PETSc comm */
4730   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4731   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4732   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4733 
4734   ierr = PetscNew(&merge);CHKERRQ(ierr);
4735   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4736 
4737   /* determine row ownership */
4738   /*---------------------------------------------------------*/
4739   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4740   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4741   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4742   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4743   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4744   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4745   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4746 
4747   m      = merge->rowmap->n;
4748   owners = merge->rowmap->range;
4749 
4750   /* determine the number of messages to send, their lengths */
4751   /*---------------------------------------------------------*/
4752   len_s = merge->len_s;
4753 
4754   len          = 0; /* length of buf_si[] */
4755   merge->nsend = 0;
4756   for (proc=0; proc<size; proc++) {
4757     len_si[proc] = 0;
4758     if (proc == rank) {
4759       len_s[proc] = 0;
4760     } else {
4761       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4762       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4763     }
4764     if (len_s[proc]) {
4765       merge->nsend++;
4766       nrows = 0;
4767       for (i=owners[proc]; i<owners[proc+1]; i++) {
4768         if (ai[i+1] > ai[i]) nrows++;
4769       }
4770       len_si[proc] = 2*(nrows+1);
4771       len         += len_si[proc];
4772     }
4773   }
4774 
4775   /* determine the number and length of messages to receive for ij-structure */
4776   /*-------------------------------------------------------------------------*/
4777   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4778   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4779 
4780   /* post the Irecv of j-structure */
4781   /*-------------------------------*/
4782   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4783   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4784 
4785   /* post the Isend of j-structure */
4786   /*--------------------------------*/
4787   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4788 
4789   for (proc=0, k=0; proc<size; proc++) {
4790     if (!len_s[proc]) continue;
4791     i    = owners[proc];
4792     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4793     k++;
4794   }
4795 
4796   /* receives and sends of j-structure are complete */
4797   /*------------------------------------------------*/
4798   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4799   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4800 
4801   /* send and recv i-structure */
4802   /*---------------------------*/
4803   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4804   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4805 
4806   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4807   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4808   for (proc=0,k=0; proc<size; proc++) {
4809     if (!len_s[proc]) continue;
4810     /* form outgoing message for i-structure:
4811          buf_si[0]:                 nrows to be sent
4812                [1:nrows]:           row index (global)
4813                [nrows+1:2*nrows+1]: i-structure index
4814     */
4815     /*-------------------------------------------*/
4816     nrows       = len_si[proc]/2 - 1;
4817     buf_si_i    = buf_si + nrows+1;
4818     buf_si[0]   = nrows;
4819     buf_si_i[0] = 0;
4820     nrows       = 0;
4821     for (i=owners[proc]; i<owners[proc+1]; i++) {
4822       anzi = ai[i+1] - ai[i];
4823       if (anzi) {
4824         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4825         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4826         nrows++;
4827       }
4828     }
4829     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4830     k++;
4831     buf_si += len_si[proc];
4832   }
4833 
4834   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4835   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4836 
4837   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4838   for (i=0; i<merge->nrecv; i++) {
4839     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4840   }
4841 
4842   ierr = PetscFree(len_si);CHKERRQ(ierr);
4843   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4844   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4845   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4846   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4847   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4848   ierr = PetscFree(status);CHKERRQ(ierr);
4849 
4850   /* compute a local seq matrix in each processor */
4851   /*----------------------------------------------*/
4852   /* allocate bi array and free space for accumulating nonzero column info */
4853   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4854   bi[0] = 0;
4855 
4856   /* create and initialize a linked list */
4857   nlnk = N+1;
4858   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4859 
4860   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4861   len  = ai[owners[rank+1]] - ai[owners[rank]];
4862   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4863 
4864   current_space = free_space;
4865 
4866   /* determine symbolic info for each local row */
4867   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4868 
4869   for (k=0; k<merge->nrecv; k++) {
4870     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4871     nrows       = *buf_ri_k[k];
4872     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4873     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4874   }
4875 
4876   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4877   len  = 0;
4878   for (i=0; i<m; i++) {
4879     bnzi = 0;
4880     /* add local non-zero cols of this proc's seqmat into lnk */
4881     arow  = owners[rank] + i;
4882     anzi  = ai[arow+1] - ai[arow];
4883     aj    = a->j + ai[arow];
4884     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4885     bnzi += nlnk;
4886     /* add received col data into lnk */
4887     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4888       if (i == *nextrow[k]) { /* i-th row */
4889         anzi  = *(nextai[k]+1) - *nextai[k];
4890         aj    = buf_rj[k] + *nextai[k];
4891         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4892         bnzi += nlnk;
4893         nextrow[k]++; nextai[k]++;
4894       }
4895     }
4896     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4897 
4898     /* if free space is not available, make more free space */
4899     if (current_space->local_remaining<bnzi) {
4900       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4901       nspacedouble++;
4902     }
4903     /* copy data into free space, then initialize lnk */
4904     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4905     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4906 
4907     current_space->array           += bnzi;
4908     current_space->local_used      += bnzi;
4909     current_space->local_remaining -= bnzi;
4910 
4911     bi[i+1] = bi[i] + bnzi;
4912   }
4913 
4914   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4915 
4916   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4917   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4918   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4919 
4920   /* create symbolic parallel matrix B_mpi */
4921   /*---------------------------------------*/
4922   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4923   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4924   if (n==PETSC_DECIDE) {
4925     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4926   } else {
4927     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4928   }
4929   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4930   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4931   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4932   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4933   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4934 
4935   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4936   B_mpi->assembled  = PETSC_FALSE;
4937   merge->bi         = bi;
4938   merge->bj         = bj;
4939   merge->buf_ri     = buf_ri;
4940   merge->buf_rj     = buf_rj;
4941   merge->coi        = NULL;
4942   merge->coj        = NULL;
4943   merge->owners_co  = NULL;
4944 
4945   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4946 
4947   /* attach the supporting struct to B_mpi for reuse */
4948   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4949   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4950   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4951   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4952   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4953   *mpimat = B_mpi;
4954 
4955   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4956   PetscFunctionReturn(0);
4957 }
4958 
4959 /*@C
4960       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4961                  matrices from each processor
4962 
4963     Collective
4964 
4965    Input Parameters:
4966 +    comm - the communicators the parallel matrix will live on
4967 .    seqmat - the input sequential matrices
4968 .    m - number of local rows (or PETSC_DECIDE)
4969 .    n - number of local columns (or PETSC_DECIDE)
4970 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4971 
4972    Output Parameter:
4973 .    mpimat - the parallel matrix generated
4974 
4975     Level: advanced
4976 
4977    Notes:
4978      The dimensions of the sequential matrix in each processor MUST be the same.
4979      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4980      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4981 @*/
4982 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4983 {
4984   PetscErrorCode ierr;
4985   PetscMPIInt    size;
4986 
4987   PetscFunctionBegin;
4988   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4989   if (size == 1) {
4990     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4991     if (scall == MAT_INITIAL_MATRIX) {
4992       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4993     } else {
4994       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4995     }
4996     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4997     PetscFunctionReturn(0);
4998   }
4999   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5000   if (scall == MAT_INITIAL_MATRIX) {
5001     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5002   }
5003   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5004   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005   PetscFunctionReturn(0);
5006 }
5007 
5008 /*@
5009      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5010           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5011           with MatGetSize()
5012 
5013     Not Collective
5014 
5015    Input Parameters:
5016 +    A - the matrix
5017 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5018 
5019    Output Parameter:
5020 .    A_loc - the local sequential matrix generated
5021 
5022     Level: developer
5023 
5024    Notes:
5025      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5026      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5027      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5028      modify the values of the returned A_loc.
5029 
5030 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5031 @*/
5032 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5033 {
5034   PetscErrorCode    ierr;
5035   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5036   Mat_SeqAIJ        *mat,*a,*b;
5037   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5038   const PetscScalar *aa,*ba,*aav,*bav;
5039   PetscScalar       *ca,*cam;
5040   PetscMPIInt       size;
5041   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5042   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5043   PetscBool         match;
5044 
5045   PetscFunctionBegin;
5046   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5047   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5048   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5049   if (size == 1) {
5050     if (scall == MAT_INITIAL_MATRIX) {
5051       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5052       *A_loc = mpimat->A;
5053     } else if (scall == MAT_REUSE_MATRIX) {
5054       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5055     }
5056     PetscFunctionReturn(0);
5057   }
5058 
5059   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5060   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5061   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5062   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5063   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5064   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5065   aa   = aav;
5066   ba   = bav;
5067   if (scall == MAT_INITIAL_MATRIX) {
5068     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5069     ci[0] = 0;
5070     for (i=0; i<am; i++) {
5071       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5072     }
5073     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5074     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5075     k    = 0;
5076     for (i=0; i<am; i++) {
5077       ncols_o = bi[i+1] - bi[i];
5078       ncols_d = ai[i+1] - ai[i];
5079       /* off-diagonal portion of A */
5080       for (jo=0; jo<ncols_o; jo++) {
5081         col = cmap[*bj];
5082         if (col >= cstart) break;
5083         cj[k]   = col; bj++;
5084         ca[k++] = *ba++;
5085       }
5086       /* diagonal portion of A */
5087       for (j=0; j<ncols_d; j++) {
5088         cj[k]   = cstart + *aj++;
5089         ca[k++] = *aa++;
5090       }
5091       /* off-diagonal portion of A */
5092       for (j=jo; j<ncols_o; j++) {
5093         cj[k]   = cmap[*bj++];
5094         ca[k++] = *ba++;
5095       }
5096     }
5097     /* put together the new matrix */
5098     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5099     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5100     /* Since these are PETSc arrays, change flags to free them as necessary. */
5101     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5102     mat->free_a  = PETSC_TRUE;
5103     mat->free_ij = PETSC_TRUE;
5104     mat->nonew   = 0;
5105   } else if (scall == MAT_REUSE_MATRIX) {
5106     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5107 #if defined(PETSC_USE_DEVICE)
5108     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5109 #endif
5110     ci = mat->i; cj = mat->j; cam = mat->a;
5111     for (i=0; i<am; i++) {
5112       /* off-diagonal portion of A */
5113       ncols_o = bi[i+1] - bi[i];
5114       for (jo=0; jo<ncols_o; jo++) {
5115         col = cmap[*bj];
5116         if (col >= cstart) break;
5117         *cam++ = *ba++; bj++;
5118       }
5119       /* diagonal portion of A */
5120       ncols_d = ai[i+1] - ai[i];
5121       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5122       /* off-diagonal portion of A */
5123       for (j=jo; j<ncols_o; j++) {
5124         *cam++ = *ba++; bj++;
5125       }
5126     }
5127   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5128   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5129   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5130   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5131   PetscFunctionReturn(0);
5132 }
5133 
5134 /*@
5135      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5136           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5137 
5138     Not Collective
5139 
5140    Input Parameters:
5141 +    A - the matrix
5142 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5143 
5144    Output Parameter:
5145 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5146 -    A_loc - the local sequential matrix generated
5147 
5148     Level: developer
5149 
5150    Notes:
5151      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5152 
5153 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5154 
5155 @*/
5156 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5157 {
5158   PetscErrorCode ierr;
5159   Mat            Ao,Ad;
5160   const PetscInt *cmap;
5161   PetscMPIInt    size;
5162   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5163 
5164   PetscFunctionBegin;
5165   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5166   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5167   if (size == 1) {
5168     if (scall == MAT_INITIAL_MATRIX) {
5169       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5170       *A_loc = Ad;
5171     } else if (scall == MAT_REUSE_MATRIX) {
5172       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5173     }
5174     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5175     PetscFunctionReturn(0);
5176   }
5177   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5178   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5179   if (f) {
5180     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5181   } else {
5182     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5183     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5184     Mat_SeqAIJ        *c;
5185     PetscInt          *ai = a->i, *aj = a->j;
5186     PetscInt          *bi = b->i, *bj = b->j;
5187     PetscInt          *ci,*cj;
5188     const PetscScalar *aa,*ba;
5189     PetscScalar       *ca;
5190     PetscInt          i,j,am,dn,on;
5191 
5192     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5193     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5194     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5195     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5196     if (scall == MAT_INITIAL_MATRIX) {
5197       PetscInt k;
5198       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5199       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5200       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5201       ci[0] = 0;
5202       for (i=0,k=0; i<am; i++) {
5203         const PetscInt ncols_o = bi[i+1] - bi[i];
5204         const PetscInt ncols_d = ai[i+1] - ai[i];
5205         ci[i+1] = ci[i] + ncols_o + ncols_d;
5206         /* diagonal portion of A */
5207         for (j=0; j<ncols_d; j++,k++) {
5208           cj[k] = *aj++;
5209           ca[k] = *aa++;
5210         }
5211         /* off-diagonal portion of A */
5212         for (j=0; j<ncols_o; j++,k++) {
5213           cj[k] = dn + *bj++;
5214           ca[k] = *ba++;
5215         }
5216       }
5217       /* put together the new matrix */
5218       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5219       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5220       /* Since these are PETSc arrays, change flags to free them as necessary. */
5221       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5222       c->free_a  = PETSC_TRUE;
5223       c->free_ij = PETSC_TRUE;
5224       c->nonew   = 0;
5225       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5226     } else if (scall == MAT_REUSE_MATRIX) {
5227 #if defined(PETSC_HAVE_DEVICE)
5228       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5229 #endif
5230       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5231       ca = c->a;
5232       for (i=0; i<am; i++) {
5233         const PetscInt ncols_d = ai[i+1] - ai[i];
5234         const PetscInt ncols_o = bi[i+1] - bi[i];
5235         /* diagonal portion of A */
5236         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5237         /* off-diagonal portion of A */
5238         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5239       }
5240     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5241     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5242     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5243     if (glob) {
5244       PetscInt cst, *gidx;
5245 
5246       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5247       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5248       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5249       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5250       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5251     }
5252   }
5253   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5254   PetscFunctionReturn(0);
5255 }
5256 
5257 /*@C
5258      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5259 
5260     Not Collective
5261 
5262    Input Parameters:
5263 +    A - the matrix
5264 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5265 -    row, col - index sets of rows and columns to extract (or NULL)
5266 
5267    Output Parameter:
5268 .    A_loc - the local sequential matrix generated
5269 
5270     Level: developer
5271 
5272 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5273 
5274 @*/
5275 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5276 {
5277   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5278   PetscErrorCode ierr;
5279   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5280   IS             isrowa,iscola;
5281   Mat            *aloc;
5282   PetscBool      match;
5283 
5284   PetscFunctionBegin;
5285   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5286   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5287   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5288   if (!row) {
5289     start = A->rmap->rstart; end = A->rmap->rend;
5290     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5291   } else {
5292     isrowa = *row;
5293   }
5294   if (!col) {
5295     start = A->cmap->rstart;
5296     cmap  = a->garray;
5297     nzA   = a->A->cmap->n;
5298     nzB   = a->B->cmap->n;
5299     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5300     ncols = 0;
5301     for (i=0; i<nzB; i++) {
5302       if (cmap[i] < start) idx[ncols++] = cmap[i];
5303       else break;
5304     }
5305     imark = i;
5306     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5307     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5308     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5309   } else {
5310     iscola = *col;
5311   }
5312   if (scall != MAT_INITIAL_MATRIX) {
5313     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5314     aloc[0] = *A_loc;
5315   }
5316   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5317   if (!col) { /* attach global id of condensed columns */
5318     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5319   }
5320   *A_loc = aloc[0];
5321   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5322   if (!row) {
5323     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5324   }
5325   if (!col) {
5326     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5327   }
5328   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5329   PetscFunctionReturn(0);
5330 }
5331 
5332 /*
5333  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5334  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5335  * on a global size.
5336  * */
5337 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5338 {
5339   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5340   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5341   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5342   PetscMPIInt              owner;
5343   PetscSFNode              *iremote,*oiremote;
5344   const PetscInt           *lrowindices;
5345   PetscErrorCode           ierr;
5346   PetscSF                  sf,osf;
5347   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5348   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5349   MPI_Comm                 comm;
5350   ISLocalToGlobalMapping   mapping;
5351 
5352   PetscFunctionBegin;
5353   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5354   /* plocalsize is the number of roots
5355    * nrows is the number of leaves
5356    * */
5357   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5358   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5359   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5360   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5361   for (i=0;i<nrows;i++) {
5362     /* Find a remote index and an owner for a row
5363      * The row could be local or remote
5364      * */
5365     owner = 0;
5366     lidx  = 0;
5367     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5368     iremote[i].index = lidx;
5369     iremote[i].rank  = owner;
5370   }
5371   /* Create SF to communicate how many nonzero columns for each row */
5372   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5373   /* SF will figure out the number of nonzero colunms for each row, and their
5374    * offsets
5375    * */
5376   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5377   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5378   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5379 
5380   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5381   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5382   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5383   roffsets[0] = 0;
5384   roffsets[1] = 0;
5385   for (i=0;i<plocalsize;i++) {
5386     /* diag */
5387     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5388     /* off diag */
5389     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5390     /* compute offsets so that we relative location for each row */
5391     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5392     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5393   }
5394   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5395   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5396   /* 'r' means root, and 'l' means leaf */
5397   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5398   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5399   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols,MPI_REPLACE);CHKERRQ(ierr);
5400   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets,MPI_REPLACE);CHKERRQ(ierr);
5401   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5402   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5403   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5404   dntotalcols = 0;
5405   ontotalcols = 0;
5406   ncol = 0;
5407   for (i=0;i<nrows;i++) {
5408     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5409     ncol = PetscMax(pnnz[i],ncol);
5410     /* diag */
5411     dntotalcols += nlcols[i*2+0];
5412     /* off diag */
5413     ontotalcols += nlcols[i*2+1];
5414   }
5415   /* We do not need to figure the right number of columns
5416    * since all the calculations will be done by going through the raw data
5417    * */
5418   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5419   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5420   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5421   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5422   /* diag */
5423   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5424   /* off diag */
5425   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5426   /* diag */
5427   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5428   /* off diag */
5429   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5430   dntotalcols = 0;
5431   ontotalcols = 0;
5432   ntotalcols  = 0;
5433   for (i=0;i<nrows;i++) {
5434     owner = 0;
5435     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5436     /* Set iremote for diag matrix */
5437     for (j=0;j<nlcols[i*2+0];j++) {
5438       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5439       iremote[dntotalcols].rank    = owner;
5440       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5441       ilocal[dntotalcols++]        = ntotalcols++;
5442     }
5443     /* off diag */
5444     for (j=0;j<nlcols[i*2+1];j++) {
5445       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5446       oiremote[ontotalcols].rank    = owner;
5447       oilocal[ontotalcols++]        = ntotalcols++;
5448     }
5449   }
5450   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5451   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5452   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5453   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5454   /* P serves as roots and P_oth is leaves
5455    * Diag matrix
5456    * */
5457   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5458   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5459   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5460 
5461   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5462   /* Off diag */
5463   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5464   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5465   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5466   /* We operate on the matrix internal data for saving memory */
5467   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5468   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5469   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5470   /* Convert to global indices for diag matrix */
5471   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5472   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5473   /* We want P_oth store global indices */
5474   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5475   /* Use memory scalable approach */
5476   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5477   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5478   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5479   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5480   /* Convert back to local indices */
5481   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5482   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j,MPI_REPLACE);CHKERRQ(ierr);
5483   nout = 0;
5484   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5485   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5486   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5487   /* Exchange values */
5488   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5489   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5490   /* Stop PETSc from shrinking memory */
5491   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5492   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5493   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5495   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5496   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5497   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5498   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5499   PetscFunctionReturn(0);
5500 }
5501 
5502 /*
5503  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5504  * This supports MPIAIJ and MAIJ
5505  * */
5506 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5507 {
5508   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5509   Mat_SeqAIJ            *p_oth;
5510   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5511   IS                    rows,map;
5512   PetscHMapI            hamp;
5513   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5514   MPI_Comm              comm;
5515   PetscSF               sf,osf;
5516   PetscBool             has;
5517   PetscErrorCode        ierr;
5518 
5519   PetscFunctionBegin;
5520   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5521   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5522   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5523    *  and then create a submatrix (that often is an overlapping matrix)
5524    * */
5525   if (reuse == MAT_INITIAL_MATRIX) {
5526     /* Use a hash table to figure out unique keys */
5527     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5528     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5529     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5530     count = 0;
5531     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5532     for (i=0;i<a->B->cmap->n;i++) {
5533       key  = a->garray[i]/dof;
5534       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5535       if (!has) {
5536         mapping[i] = count;
5537         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5538       } else {
5539         /* Current 'i' has the same value the previous step */
5540         mapping[i] = count-1;
5541       }
5542     }
5543     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5544     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5545     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);
5546     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5547     off = 0;
5548     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5549     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5550     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5551     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5552     /* In case, the matrix was already created but users want to recreate the matrix */
5553     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5554     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5555     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5556     ierr = ISDestroy(&map);CHKERRQ(ierr);
5557     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5558   } else if (reuse == MAT_REUSE_MATRIX) {
5559     /* If matrix was already created, we simply update values using SF objects
5560      * that as attached to the matrix ealier.
5561      *  */
5562     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5563     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5564     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5565     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5566     /* Update values in place */
5567     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5568     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5569     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5570     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a,MPI_REPLACE);CHKERRQ(ierr);
5571   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5572   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5573   PetscFunctionReturn(0);
5574 }
5575 
5576 /*@C
5577     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5578 
5579     Collective on Mat
5580 
5581    Input Parameters:
5582 +    A,B - the matrices in mpiaij format
5583 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5584 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5585 
5586    Output Parameter:
5587 +    rowb, colb - index sets of rows and columns of B to extract
5588 -    B_seq - the sequential matrix generated
5589 
5590     Level: developer
5591 
5592 @*/
5593 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5594 {
5595   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5596   PetscErrorCode ierr;
5597   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5598   IS             isrowb,iscolb;
5599   Mat            *bseq=NULL;
5600 
5601   PetscFunctionBegin;
5602   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5603     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5604   }
5605   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5606 
5607   if (scall == MAT_INITIAL_MATRIX) {
5608     start = A->cmap->rstart;
5609     cmap  = a->garray;
5610     nzA   = a->A->cmap->n;
5611     nzB   = a->B->cmap->n;
5612     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5613     ncols = 0;
5614     for (i=0; i<nzB; i++) {  /* row < local row index */
5615       if (cmap[i] < start) idx[ncols++] = cmap[i];
5616       else break;
5617     }
5618     imark = i;
5619     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5620     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5621     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5622     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5623   } else {
5624     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5625     isrowb  = *rowb; iscolb = *colb;
5626     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5627     bseq[0] = *B_seq;
5628   }
5629   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5630   *B_seq = bseq[0];
5631   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5632   if (!rowb) {
5633     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5634   } else {
5635     *rowb = isrowb;
5636   }
5637   if (!colb) {
5638     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5639   } else {
5640     *colb = iscolb;
5641   }
5642   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5643   PetscFunctionReturn(0);
5644 }
5645 
5646 /*
5647     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5648     of the OFF-DIAGONAL portion of local A
5649 
5650     Collective on Mat
5651 
5652    Input Parameters:
5653 +    A,B - the matrices in mpiaij format
5654 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5655 
5656    Output Parameter:
5657 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5658 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5659 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5660 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5661 
5662     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5663      for this matrix. This is not desirable..
5664 
5665     Level: developer
5666 
5667 */
5668 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5669 {
5670   PetscErrorCode         ierr;
5671   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5672   Mat_SeqAIJ             *b_oth;
5673   VecScatter             ctx;
5674   MPI_Comm               comm;
5675   const PetscMPIInt      *rprocs,*sprocs;
5676   const PetscInt         *srow,*rstarts,*sstarts;
5677   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5678   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5679   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5680   MPI_Request            *reqs = NULL,*rwaits = NULL,*swaits = NULL;
5681   PetscMPIInt            size,tag,rank,nreqs;
5682 
5683   PetscFunctionBegin;
5684   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5685   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5686 
5687   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5688     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5689   }
5690   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5691   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5692 
5693   if (size == 1) {
5694     startsj_s = NULL;
5695     bufa_ptr  = NULL;
5696     *B_oth    = NULL;
5697     PetscFunctionReturn(0);
5698   }
5699 
5700   ctx = a->Mvctx;
5701   tag = ((PetscObject)ctx)->tag;
5702 
5703   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5704   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5705   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5706   ierr = PetscMPIIntCast(nsends+nrecvs,&nreqs);CHKERRQ(ierr);
5707   ierr = PetscMalloc1(nreqs,&reqs);CHKERRQ(ierr);
5708   rwaits = reqs;
5709   swaits = reqs + nrecvs;
5710 
5711   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5712   if (scall == MAT_INITIAL_MATRIX) {
5713     /* i-array */
5714     /*---------*/
5715     /*  post receives */
5716     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5717     for (i=0; i<nrecvs; i++) {
5718       rowlen = rvalues + rstarts[i]*rbs;
5719       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5720       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5721     }
5722 
5723     /* pack the outgoing message */
5724     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5725 
5726     sstartsj[0] = 0;
5727     rstartsj[0] = 0;
5728     len         = 0; /* total length of j or a array to be sent */
5729     if (nsends) {
5730       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5731       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5732     }
5733     for (i=0; i<nsends; i++) {
5734       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5735       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5736       for (j=0; j<nrows; j++) {
5737         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5738         for (l=0; l<sbs; l++) {
5739           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5740 
5741           rowlen[j*sbs+l] = ncols;
5742 
5743           len += ncols;
5744           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5745         }
5746         k++;
5747       }
5748       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5749 
5750       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5751     }
5752     /* recvs and sends of i-array are completed */
5753     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5754     ierr = PetscFree(svalues);CHKERRQ(ierr);
5755 
5756     /* allocate buffers for sending j and a arrays */
5757     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5758     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5759 
5760     /* create i-array of B_oth */
5761     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5762 
5763     b_othi[0] = 0;
5764     len       = 0; /* total length of j or a array to be received */
5765     k         = 0;
5766     for (i=0; i<nrecvs; i++) {
5767       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5768       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5769       for (j=0; j<nrows; j++) {
5770         b_othi[k+1] = b_othi[k] + rowlen[j];
5771         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5772         k++;
5773       }
5774       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5775     }
5776     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5777 
5778     /* allocate space for j and a arrrays of B_oth */
5779     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5780     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5781 
5782     /* j-array */
5783     /*---------*/
5784     /*  post receives of j-array */
5785     for (i=0; i<nrecvs; i++) {
5786       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5787       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5788     }
5789 
5790     /* pack the outgoing message j-array */
5791     if (nsends) k = sstarts[0];
5792     for (i=0; i<nsends; i++) {
5793       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5794       bufJ  = bufj+sstartsj[i];
5795       for (j=0; j<nrows; j++) {
5796         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5797         for (ll=0; ll<sbs; ll++) {
5798           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5799           for (l=0; l<ncols; l++) {
5800             *bufJ++ = cols[l];
5801           }
5802           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5803         }
5804       }
5805       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5806     }
5807 
5808     /* recvs and sends of j-array are completed */
5809     if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5810   } else if (scall == MAT_REUSE_MATRIX) {
5811     sstartsj = *startsj_s;
5812     rstartsj = *startsj_r;
5813     bufa     = *bufa_ptr;
5814     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5815     b_otha   = b_oth->a;
5816 #if defined(PETSC_HAVE_DEVICE)
5817     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5818 #endif
5819   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container");
5820 
5821   /* a-array */
5822   /*---------*/
5823   /*  post receives of a-array */
5824   for (i=0; i<nrecvs; i++) {
5825     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5826     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRMPI(ierr);
5827   }
5828 
5829   /* pack the outgoing message a-array */
5830   if (nsends) k = sstarts[0];
5831   for (i=0; i<nsends; i++) {
5832     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5833     bufA  = bufa+sstartsj[i];
5834     for (j=0; j<nrows; j++) {
5835       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5836       for (ll=0; ll<sbs; ll++) {
5837         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5838         for (l=0; l<ncols; l++) {
5839           *bufA++ = vals[l];
5840         }
5841         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5842       }
5843     }
5844     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5845   }
5846   /* recvs and sends of a-array are completed */
5847   if (nreqs) {ierr = MPI_Waitall(nreqs,reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5848   ierr = PetscFree(reqs);CHKERRQ(ierr);
5849 
5850   if (scall == MAT_INITIAL_MATRIX) {
5851     /* put together the new matrix */
5852     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5853 
5854     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5855     /* Since these are PETSc arrays, change flags to free them as necessary. */
5856     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5857     b_oth->free_a  = PETSC_TRUE;
5858     b_oth->free_ij = PETSC_TRUE;
5859     b_oth->nonew   = 0;
5860 
5861     ierr = PetscFree(bufj);CHKERRQ(ierr);
5862     if (!startsj_s || !bufa_ptr) {
5863       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5864       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5865     } else {
5866       *startsj_s = sstartsj;
5867       *startsj_r = rstartsj;
5868       *bufa_ptr  = bufa;
5869     }
5870   }
5871 
5872   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5873   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5874   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5875   PetscFunctionReturn(0);
5876 }
5877 
5878 /*@C
5879   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5880 
5881   Not Collective
5882 
5883   Input Parameters:
5884 . A - The matrix in mpiaij format
5885 
5886   Output Parameter:
5887 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5888 . colmap - A map from global column index to local index into lvec
5889 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5890 
5891   Level: developer
5892 
5893 @*/
5894 #if defined(PETSC_USE_CTABLE)
5895 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5896 #else
5897 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5898 #endif
5899 {
5900   Mat_MPIAIJ *a;
5901 
5902   PetscFunctionBegin;
5903   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5904   PetscValidPointer(lvec, 2);
5905   PetscValidPointer(colmap, 3);
5906   PetscValidPointer(multScatter, 4);
5907   a = (Mat_MPIAIJ*) A->data;
5908   if (lvec) *lvec = a->lvec;
5909   if (colmap) *colmap = a->colmap;
5910   if (multScatter) *multScatter = a->Mvctx;
5911   PetscFunctionReturn(0);
5912 }
5913 
5914 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5915 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5916 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5917 #if defined(PETSC_HAVE_MKL_SPARSE)
5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5919 #endif
5920 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5922 #if defined(PETSC_HAVE_ELEMENTAL)
5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5924 #endif
5925 #if defined(PETSC_HAVE_SCALAPACK)
5926 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5927 #endif
5928 #if defined(PETSC_HAVE_HYPRE)
5929 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5930 #endif
5931 #if defined(PETSC_HAVE_CUDA)
5932 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5933 #endif
5934 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5935 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5936 #endif
5937 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5938 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5939 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5940 
5941 /*
5942     Computes (B'*A')' since computing B*A directly is untenable
5943 
5944                n                       p                          p
5945         [             ]       [             ]         [                 ]
5946       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5947         [             ]       [             ]         [                 ]
5948 
5949 */
5950 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5951 {
5952   PetscErrorCode ierr;
5953   Mat            At,Bt,Ct;
5954 
5955   PetscFunctionBegin;
5956   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5957   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5958   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5959   ierr = MatDestroy(&At);CHKERRQ(ierr);
5960   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5961   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5962   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5963   PetscFunctionReturn(0);
5964 }
5965 
5966 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5967 {
5968   PetscErrorCode ierr;
5969   PetscBool      cisdense;
5970 
5971   PetscFunctionBegin;
5972   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5973   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5974   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5975   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5976   if (!cisdense) {
5977     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5978   }
5979   ierr = MatSetUp(C);CHKERRQ(ierr);
5980 
5981   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5982   PetscFunctionReturn(0);
5983 }
5984 
5985 /* ----------------------------------------------------------------*/
5986 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
5987 {
5988   Mat_Product *product = C->product;
5989   Mat         A = product->A,B=product->B;
5990 
5991   PetscFunctionBegin;
5992   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
5993     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5994 
5995   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
5996   C->ops->productsymbolic = MatProductSymbolic_AB;
5997   PetscFunctionReturn(0);
5998 }
5999 
6000 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6001 {
6002   PetscErrorCode ierr;
6003   Mat_Product    *product = C->product;
6004 
6005   PetscFunctionBegin;
6006   if (product->type == MATPRODUCT_AB) {
6007     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6008   }
6009   PetscFunctionReturn(0);
6010 }
6011 /* ----------------------------------------------------------------*/
6012 
6013 /*MC
6014    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6015 
6016    Options Database Keys:
6017 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6018 
6019    Level: beginner
6020 
6021    Notes:
6022     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6023     in this case the values associated with the rows and columns one passes in are set to zero
6024     in the matrix
6025 
6026     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6027     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6028 
6029 .seealso: MatCreateAIJ()
6030 M*/
6031 
6032 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6033 {
6034   Mat_MPIAIJ     *b;
6035   PetscErrorCode ierr;
6036   PetscMPIInt    size;
6037 
6038   PetscFunctionBegin;
6039   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6040 
6041   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6042   B->data       = (void*)b;
6043   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6044   B->assembled  = PETSC_FALSE;
6045   B->insertmode = NOT_SET_VALUES;
6046   b->size       = size;
6047 
6048   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6049 
6050   /* build cache for off array entries formed */
6051   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6052 
6053   b->donotstash  = PETSC_FALSE;
6054   b->colmap      = NULL;
6055   b->garray      = NULL;
6056   b->roworiented = PETSC_TRUE;
6057 
6058   /* stuff used for matrix vector multiply */
6059   b->lvec  = NULL;
6060   b->Mvctx = NULL;
6061 
6062   /* stuff for MatGetRow() */
6063   b->rowindices   = NULL;
6064   b->rowvalues    = NULL;
6065   b->getrowactive = PETSC_FALSE;
6066 
6067   /* flexible pointer used in CUSPARSE classes */
6068   b->spptr = NULL;
6069 
6070   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6071   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6072   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6073   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6074   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6075   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6076   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6077   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6080 #if defined(PETSC_HAVE_CUDA)
6081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6082 #endif
6083 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6085 #endif
6086 #if defined(PETSC_HAVE_MKL_SPARSE)
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6088 #endif
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense);CHKERRQ(ierr);
6093 #if defined(PETSC_HAVE_ELEMENTAL)
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6095 #endif
6096 #if defined(PETSC_HAVE_SCALAPACK)
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6098 #endif
6099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6101 #if defined(PETSC_HAVE_HYPRE)
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6104 #endif
6105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6107   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6108   PetscFunctionReturn(0);
6109 }
6110 
6111 /*@C
6112      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6113          and "off-diagonal" part of the matrix in CSR format.
6114 
6115    Collective
6116 
6117    Input Parameters:
6118 +  comm - MPI communicator
6119 .  m - number of local rows (Cannot be PETSC_DECIDE)
6120 .  n - This value should be the same as the local size used in creating the
6121        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6122        calculated if N is given) For square matrices n is almost always m.
6123 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6124 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6125 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6126 .   j - column indices
6127 .   a - matrix values
6128 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6129 .   oj - column indices
6130 -   oa - matrix values
6131 
6132    Output Parameter:
6133 .   mat - the matrix
6134 
6135    Level: advanced
6136 
6137    Notes:
6138        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6139        must free the arrays once the matrix has been destroyed and not before.
6140 
6141        The i and j indices are 0 based
6142 
6143        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6144 
6145        This sets local rows and cannot be used to set off-processor values.
6146 
6147        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6148        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6149        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6150        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6151        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6152        communication if it is known that only local entries will be set.
6153 
6154 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6155           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6156 @*/
6157 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6158 {
6159   PetscErrorCode ierr;
6160   Mat_MPIAIJ     *maij;
6161 
6162   PetscFunctionBegin;
6163   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6164   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6165   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6166   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6167   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6168   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6169   maij = (Mat_MPIAIJ*) (*mat)->data;
6170 
6171   (*mat)->preallocated = PETSC_TRUE;
6172 
6173   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6174   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6175 
6176   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6177   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6178 
6179   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6180   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6181   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6182   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6183 
6184   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6185   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6186   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6187   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6188   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6189   PetscFunctionReturn(0);
6190 }
6191 
6192 /*
6193     Special version for direct calls from Fortran
6194 */
6195 #include <petsc/private/fortranimpl.h>
6196 
6197 /* Change these macros so can be used in void function */
6198 #undef CHKERRQ
6199 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6200 #undef SETERRQ2
6201 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6202 #undef SETERRQ3
6203 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6204 #undef SETERRQ
6205 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6206 
6207 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6208 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6209 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6210 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6211 #else
6212 #endif
6213 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6214 {
6215   Mat            mat  = *mmat;
6216   PetscInt       m    = *mm, n = *mn;
6217   InsertMode     addv = *maddv;
6218   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6219   PetscScalar    value;
6220   PetscErrorCode ierr;
6221 
6222   MatCheckPreallocated(mat,1);
6223   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6224   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6225   {
6226     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6227     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6228     PetscBool roworiented = aij->roworiented;
6229 
6230     /* Some Variables required in the macro */
6231     Mat        A                    = aij->A;
6232     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6233     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6234     MatScalar  *aa                  = a->a;
6235     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6236     Mat        B                    = aij->B;
6237     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6238     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6239     MatScalar  *ba                  = b->a;
6240     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6241      * cannot use "#if defined" inside a macro. */
6242     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6243 
6244     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6245     PetscInt  nonew = a->nonew;
6246     MatScalar *ap1,*ap2;
6247 
6248     PetscFunctionBegin;
6249     for (i=0; i<m; i++) {
6250       if (im[i] < 0) continue;
6251       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6252       if (im[i] >= rstart && im[i] < rend) {
6253         row      = im[i] - rstart;
6254         lastcol1 = -1;
6255         rp1      = aj + ai[row];
6256         ap1      = aa + ai[row];
6257         rmax1    = aimax[row];
6258         nrow1    = ailen[row];
6259         low1     = 0;
6260         high1    = nrow1;
6261         lastcol2 = -1;
6262         rp2      = bj + bi[row];
6263         ap2      = ba + bi[row];
6264         rmax2    = bimax[row];
6265         nrow2    = bilen[row];
6266         low2     = 0;
6267         high2    = nrow2;
6268 
6269         for (j=0; j<n; j++) {
6270           if (roworiented) value = v[i*n+j];
6271           else value = v[i+j*m];
6272           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6273           if (in[j] >= cstart && in[j] < cend) {
6274             col = in[j] - cstart;
6275             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6276 #if defined(PETSC_HAVE_DEVICE)
6277             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6278 #endif
6279           } else if (in[j] < 0) continue;
6280           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6281             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6282             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6283           } else {
6284             if (mat->was_assembled) {
6285               if (!aij->colmap) {
6286                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6287               }
6288 #if defined(PETSC_USE_CTABLE)
6289               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6290               col--;
6291 #else
6292               col = aij->colmap[in[j]] - 1;
6293 #endif
6294               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6295                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6296                 col  =  in[j];
6297                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6298                 B        = aij->B;
6299                 b        = (Mat_SeqAIJ*)B->data;
6300                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6301                 rp2      = bj + bi[row];
6302                 ap2      = ba + bi[row];
6303                 rmax2    = bimax[row];
6304                 nrow2    = bilen[row];
6305                 low2     = 0;
6306                 high2    = nrow2;
6307                 bm       = aij->B->rmap->n;
6308                 ba       = b->a;
6309                 inserted = PETSC_FALSE;
6310               }
6311             } else col = in[j];
6312             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6313 #if defined(PETSC_HAVE_DEVICE)
6314             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6315 #endif
6316           }
6317         }
6318       } else if (!aij->donotstash) {
6319         if (roworiented) {
6320           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6321         } else {
6322           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6323         }
6324       }
6325     }
6326   }
6327   PetscFunctionReturnVoid();
6328 }
6329 
6330 typedef struct {
6331   Mat       *mp;    /* intermediate products */
6332   PetscBool *mptmp; /* is the intermediate product temporary ? */
6333   PetscInt  cp;     /* number of intermediate products */
6334 
6335   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6336   PetscInt    *startsj_s,*startsj_r;
6337   PetscScalar *bufa;
6338   Mat         P_oth;
6339 
6340   /* may take advantage of merging product->B */
6341   Mat Bloc; /* B-local by merging diag and off-diag */
6342 
6343   /* cusparse does not have support to split between symbolic and numeric phases.
6344      When api_user is true, we don't need to update the numerical values
6345      of the temporary storage */
6346   PetscBool reusesym;
6347 
6348   /* support for COO values insertion */
6349   PetscScalar  *coo_v,*coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */
6350   PetscInt     **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */
6351   PetscInt     **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */
6352   PetscBool    hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */
6353   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6354   PetscMemType mtype;
6355 
6356   /* customization */
6357   PetscBool abmerge;
6358   PetscBool P_oth_bind;
6359 } MatMatMPIAIJBACKEND;
6360 
6361 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6362 {
6363   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6364   PetscInt            i;
6365   PetscErrorCode      ierr;
6366 
6367   PetscFunctionBegin;
6368   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6369   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6370   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6371   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6372   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6373   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6374   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6375   for (i = 0; i < mmdata->cp; i++) {
6376     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6377   }
6378   ierr = PetscFree2(mmdata->mp,mmdata->mptmp);CHKERRQ(ierr);
6379   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6380   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6381   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6382   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6383   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6384   PetscFunctionReturn(0);
6385 }
6386 
6387 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6388 {
6389   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6390   PetscErrorCode ierr;
6391 
6392   PetscFunctionBegin;
6393   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6394   if (f) {
6395     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6396   } else {
6397     const PetscScalar *vv;
6398 
6399     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6400     if (n && idx) {
6401       PetscScalar    *w = v;
6402       const PetscInt *oi = idx;
6403       PetscInt       j;
6404 
6405       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6406     } else {
6407       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6408     }
6409     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6410   }
6411   PetscFunctionReturn(0);
6412 }
6413 
6414 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6415 {
6416   MatMatMPIAIJBACKEND *mmdata;
6417   PetscInt            i,n_d,n_o;
6418   PetscErrorCode      ierr;
6419 
6420   PetscFunctionBegin;
6421   MatCheckProduct(C,1);
6422   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6423   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6424   if (!mmdata->reusesym) { /* update temporary matrices */
6425     if (mmdata->P_oth) {
6426       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6427     }
6428     if (mmdata->Bloc) {
6429       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6430     }
6431   }
6432   mmdata->reusesym = PETSC_FALSE;
6433 
6434   for (i = 0; i < mmdata->cp; i++) {
6435     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6436     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6437   }
6438   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6439     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6440 
6441     if (mmdata->mptmp[i]) continue;
6442     if (noff) {
6443       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6444 
6445       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6446       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6447       n_o += noff;
6448       n_d += nown;
6449     } else {
6450       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6451 
6452       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6453       n_d += mm->nz;
6454     }
6455   }
6456   if (mmdata->hasoffproc) { /* offprocess insertion */
6457     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6458     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6459   }
6460   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6461   PetscFunctionReturn(0);
6462 }
6463 
6464 /* Support for Pt * A, A * P, or Pt * A * P */
6465 #define MAX_NUMBER_INTERMEDIATE 4
6466 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6467 {
6468   Mat_Product            *product = C->product;
6469   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */
6470   Mat_MPIAIJ             *a,*p;
6471   MatMatMPIAIJBACKEND    *mmdata;
6472   ISLocalToGlobalMapping P_oth_l2g = NULL;
6473   IS                     glob = NULL;
6474   const char             *prefix;
6475   char                   pprefix[256];
6476   const PetscInt         *globidx,*P_oth_idx;
6477   PetscInt               i,j,cp,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j;
6478   PetscInt               cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */
6479                                                                                         /* type-0: consecutive, start from 0; type-1: consecutive with */
6480                                                                                         /* a base offset; type-2: sparse with a local to global map table */
6481   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */
6482 
6483   MatProductType         ptype;
6484   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6485   PetscMPIInt            size;
6486   PetscErrorCode         ierr;
6487 
6488   PetscFunctionBegin;
6489   MatCheckProduct(C,1);
6490   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6491   ptype = product->type;
6492   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6493   switch (ptype) {
6494   case MATPRODUCT_AB:
6495     A = product->A;
6496     P = product->B;
6497     m = A->rmap->n;
6498     n = P->cmap->n;
6499     M = A->rmap->N;
6500     N = P->cmap->N;
6501     hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */
6502     break;
6503   case MATPRODUCT_AtB:
6504     P = product->A;
6505     A = product->B;
6506     m = P->cmap->n;
6507     n = A->cmap->n;
6508     M = P->cmap->N;
6509     N = A->cmap->N;
6510     hasoffproc = PETSC_TRUE;
6511     break;
6512   case MATPRODUCT_PtAP:
6513     A = product->A;
6514     P = product->B;
6515     m = P->cmap->n;
6516     n = P->cmap->n;
6517     M = P->cmap->N;
6518     N = P->cmap->N;
6519     hasoffproc = PETSC_TRUE;
6520     break;
6521   default:
6522     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6523   }
6524   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRMPI(ierr);
6525   if (size == 1) hasoffproc = PETSC_FALSE;
6526 
6527   /* defaults */
6528   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6529     mp[i]    = NULL;
6530     mptmp[i] = PETSC_FALSE;
6531     rmapt[i] = -1;
6532     cmapt[i] = -1;
6533     rmapa[i] = NULL;
6534     cmapa[i] = NULL;
6535   }
6536 
6537   /* customization */
6538   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6539   mmdata->reusesym = product->api_user;
6540   if (ptype == MATPRODUCT_AB) {
6541     if (product->api_user) {
6542       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6543       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6544       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6545       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6546     } else {
6547       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6548       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6549       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6550       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6551     }
6552   } else if (ptype == MATPRODUCT_PtAP) {
6553     if (product->api_user) {
6554       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6555       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6556       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6557     } else {
6558       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6559       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6560       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6561     }
6562   }
6563   a = (Mat_MPIAIJ*)A->data;
6564   p = (Mat_MPIAIJ*)P->data;
6565   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6566   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6567   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6568   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6569   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6570 
6571   cp   = 0;
6572   switch (ptype) {
6573   case MATPRODUCT_AB: /* A * P */
6574     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6575 
6576     /* A_diag * P_local (merged or not) */
6577     if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */
6578       /* P is product->B */
6579       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6580       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6581       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6582       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6583       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6584       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6585       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6586       mp[cp]->product->api_user = product->api_user;
6587       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6588       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6589       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6590       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6591       rmapt[cp] = 1;
6592       cmapt[cp] = 2;
6593       cmapa[cp] = globidx;
6594       mptmp[cp] = PETSC_FALSE;
6595       cp++;
6596     } else { /* A_diag * P_diag and A_diag * P_off */
6597       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6598       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6599       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6600       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6601       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6602       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6603       mp[cp]->product->api_user = product->api_user;
6604       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6605       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6606       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6607       rmapt[cp] = 1;
6608       cmapt[cp] = 1;
6609       mptmp[cp] = PETSC_FALSE;
6610       cp++;
6611       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6612       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6613       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6614       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6615       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6616       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6617       mp[cp]->product->api_user = product->api_user;
6618       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6619       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6620       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6621       rmapt[cp] = 1;
6622       cmapt[cp] = 2;
6623       cmapa[cp] = p->garray;
6624       mptmp[cp] = PETSC_FALSE;
6625       cp++;
6626     }
6627 
6628     /* A_off * P_other */
6629     if (mmdata->P_oth) {
6630       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr); /* make P_oth use local col ids */
6631       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6632       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6633       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6634       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6635       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6636       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6637       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6638       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6639       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6640       mp[cp]->product->api_user = product->api_user;
6641       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6642       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6643       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6644       rmapt[cp] = 1;
6645       cmapt[cp] = 2;
6646       cmapa[cp] = P_oth_idx;
6647       mptmp[cp] = PETSC_FALSE;
6648       cp++;
6649     }
6650     break;
6651 
6652   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6653     /* A is product->B */
6654     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6655     if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */
6656       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6657       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6658       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6659       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6660       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6661       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6662       mp[cp]->product->api_user = product->api_user;
6663       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6664       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6665       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6666       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6667       rmapt[cp] = 2;
6668       rmapa[cp] = globidx;
6669       cmapt[cp] = 2;
6670       cmapa[cp] = globidx;
6671       mptmp[cp] = PETSC_FALSE;
6672       cp++;
6673     } else {
6674       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6675       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6676       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6677       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6678       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6679       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6680       mp[cp]->product->api_user = product->api_user;
6681       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6682       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6683       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6684       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6685       rmapt[cp] = 1;
6686       cmapt[cp] = 2;
6687       cmapa[cp] = globidx;
6688       mptmp[cp] = PETSC_FALSE;
6689       cp++;
6690       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6691       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6692       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6693       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6694       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6695       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6696       mp[cp]->product->api_user = product->api_user;
6697       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6698       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6699       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6700       rmapt[cp] = 2;
6701       rmapa[cp] = p->garray;
6702       cmapt[cp] = 2;
6703       cmapa[cp] = globidx;
6704       mptmp[cp] = PETSC_FALSE;
6705       cp++;
6706     }
6707     break;
6708   case MATPRODUCT_PtAP:
6709     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6710     /* P is product->B */
6711     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6712     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6713     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6714     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6715     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6716     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6717     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6718     mp[cp]->product->api_user = product->api_user;
6719     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6720     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6721     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6722     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6723     rmapt[cp] = 2;
6724     rmapa[cp] = globidx;
6725     cmapt[cp] = 2;
6726     cmapa[cp] = globidx;
6727     mptmp[cp] = PETSC_FALSE;
6728     cp++;
6729     if (mmdata->P_oth) {
6730       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6731       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6732       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6733       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6734       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6735       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6736       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6737       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6738       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6739       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6740       mp[cp]->product->api_user = product->api_user;
6741       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6742       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6743       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6744       mptmp[cp] = PETSC_TRUE;
6745       cp++;
6746       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6747       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6748       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6749       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6750       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6751       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6752       mp[cp]->product->api_user = product->api_user;
6753       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6754       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6755       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6756       rmapt[cp] = 2;
6757       rmapa[cp] = globidx;
6758       cmapt[cp] = 2;
6759       cmapa[cp] = P_oth_idx;
6760       mptmp[cp] = PETSC_FALSE;
6761       cp++;
6762     }
6763     break;
6764   default:
6765     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6766   }
6767   /* sanity check */
6768   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6769 
6770   ierr = PetscMalloc2(cp,&mmdata->mp,cp,&mmdata->mptmp);CHKERRQ(ierr);
6771   for (i = 0; i < cp; i++) {
6772     mmdata->mp[i]    = mp[i];
6773     mmdata->mptmp[i] = mptmp[i];
6774   }
6775   mmdata->cp = cp;
6776   C->product->data       = mmdata;
6777   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6778   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6779 
6780   /* memory type */
6781   mmdata->mtype = PETSC_MEMTYPE_HOST;
6782   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6783   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6784   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6785   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6786   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6787 
6788   /* prepare coo coordinates for values insertion */
6789 
6790   /* count total nonzeros of those intermediate seqaij Mats
6791     ncoo_d:    # of nonzeros of matrices that do not have offproc entries
6792     ncoo_o:    # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs
6793     ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally
6794   */
6795   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6796     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6797     if (mptmp[cp]) continue;
6798     if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */
6799       const PetscInt *rmap = rmapa[cp];
6800       const PetscInt mr = mp[cp]->rmap->n;
6801       const PetscInt rs = C->rmap->rstart;
6802       const PetscInt re = C->rmap->rend;
6803       const PetscInt *ii  = mm->i;
6804       for (i = 0; i < mr; i++) {
6805         const PetscInt gr = rmap[i];
6806         const PetscInt nz = ii[i+1] - ii[i];
6807         if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */
6808         else ncoo_oown += nz; /* this row is local */
6809       }
6810     } else ncoo_d += mm->nz;
6811   }
6812 
6813   /*
6814     ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc
6815 
6816     ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs.
6817 
6818     off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0].
6819 
6820     off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others
6821     own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally
6822     so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others.
6823 
6824     coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc.
6825     Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive.
6826   */
6827   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr); /* +1 to make a csr-like data structure */
6828   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6829 
6830   /* gather (i,j) of nonzeros inserted by remote procs */
6831   if (hasoffproc) {
6832     PetscSF  msf;
6833     PetscInt ncoo2,*coo_i2,*coo_j2;
6834 
6835     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6836     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6837     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr); /* to collect (i,j) of entries to be sent to others */
6838 
6839     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6840       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6841       PetscInt   *idxoff = mmdata->off[cp];
6842       PetscInt   *idxown = mmdata->own[cp];
6843       if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */
6844         const PetscInt *rmap = rmapa[cp];
6845         const PetscInt *cmap = cmapa[cp];
6846         const PetscInt *ii  = mm->i;
6847         PetscInt       *coi = coo_i + ncoo_o;
6848         PetscInt       *coj = coo_j + ncoo_o;
6849         const PetscInt mr = mp[cp]->rmap->n;
6850         const PetscInt rs = C->rmap->rstart;
6851         const PetscInt re = C->rmap->rend;
6852         const PetscInt cs = C->cmap->rstart;
6853         for (i = 0; i < mr; i++) {
6854           const PetscInt *jj = mm->j + ii[i];
6855           const PetscInt gr  = rmap[i];
6856           const PetscInt nz  = ii[i+1] - ii[i];
6857           if (gr < rs || gr >= re) { /* this is an offproc row */
6858             for (j = ii[i]; j < ii[i+1]; j++) {
6859               *coi++ = gr;
6860               *idxoff++ = j;
6861             }
6862             if (!cmapt[cp]) { /* already global */
6863               for (j = 0; j < nz; j++) *coj++ = jj[j];
6864             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6865               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6866             } else { /* offdiag */
6867               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6868             }
6869             ncoo_o += nz;
6870           } else { /* this is a local row */
6871             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6872           }
6873         }
6874       }
6875       mmdata->off[cp + 1] = idxoff;
6876       mmdata->own[cp + 1] = idxown;
6877     }
6878 
6879     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6880     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o/*nleaves*/,NULL/*ilocal*/,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6881     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6882     ierr = PetscSFGetGraph(msf,&ncoo2/*nroots*/,NULL,NULL,NULL);CHKERRQ(ierr);
6883     ncoo = ncoo_d + ncoo_oown + ncoo2;
6884     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6885     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr); /* put (i,j) of remote nonzeros at back */
6886     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6887     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6888     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6889     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6890     /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */
6891     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6892     coo_i = coo_i2;
6893     coo_j = coo_j2;
6894   } else { /* no offproc values insertion */
6895     ncoo = ncoo_d;
6896     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6897 
6898     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6899     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6900     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6901   }
6902   mmdata->hasoffproc = hasoffproc;
6903 
6904    /* gather (i,j) of nonzeros inserted locally */
6905   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6906     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6907     PetscInt       *coi = coo_i + ncoo_d;
6908     PetscInt       *coj = coo_j + ncoo_d;
6909     const PetscInt *jj  = mm->j;
6910     const PetscInt *ii  = mm->i;
6911     const PetscInt *cmap = cmapa[cp];
6912     const PetscInt *rmap = rmapa[cp];
6913     const PetscInt mr = mp[cp]->rmap->n;
6914     const PetscInt rs = C->rmap->rstart;
6915     const PetscInt re = C->rmap->rend;
6916     const PetscInt cs = C->cmap->rstart;
6917 
6918     if (mptmp[cp]) continue;
6919     if (rmapt[cp] == 1) { /* consecutive rows */
6920       /* fill coo_i */
6921       for (i = 0; i < mr; i++) {
6922         const PetscInt gr = i + rs;
6923         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6924       }
6925       /* fill coo_j */
6926       if (!cmapt[cp]) { /* type-0, already global */
6927         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6928       } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */
6929         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */
6930       } else { /* type-2, local to global for sparse columns */
6931         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6932       }
6933       ncoo_d += mm->nz;
6934     } else if (rmapt[cp] == 2) { /* sparse rows */
6935       for (i = 0; i < mr; i++) {
6936         const PetscInt *jj = mm->j + ii[i];
6937         const PetscInt gr  = rmap[i];
6938         const PetscInt nz  = ii[i+1] - ii[i];
6939         if (gr >= rs && gr < re) { /* local rows */
6940           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6941           if (!cmapt[cp]) { /* type-0, already global */
6942             for (j = 0; j < nz; j++) *coj++ = jj[j];
6943           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6944             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6945           } else { /* type-2, local to global for sparse columns */
6946             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6947           }
6948           ncoo_d += nz;
6949         }
6950       }
6951     }
6952   }
6953   if (glob) {
6954     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6955   }
6956   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6957   if (P_oth_l2g) {
6958     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6959   }
6960   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6961   /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */
6962   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6963 
6964   /* preallocate with COO data */
6965   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6966   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6967   PetscFunctionReturn(0);
6968 }
6969 
6970 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6971 {
6972   Mat_Product    *product = mat->product;
6973   PetscErrorCode ierr;
6974 #if defined(PETSC_HAVE_DEVICE)
6975   PetscBool      match = PETSC_FALSE;
6976   PetscBool      usecpu = PETSC_FALSE;
6977 #else
6978   PetscBool      match = PETSC_TRUE;
6979 #endif
6980 
6981   PetscFunctionBegin;
6982   MatCheckProduct(mat,1);
6983 #if defined(PETSC_HAVE_DEVICE)
6984   if (!product->A->boundtocpu && !product->B->boundtocpu) {
6985     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
6986   }
6987   if (match) { /* we can always fallback to the CPU if requested */
6988     switch (product->type) {
6989     case MATPRODUCT_AB:
6990       if (product->api_user) {
6991         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6992         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6993         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6994       } else {
6995         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6996         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6997         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6998       }
6999       break;
7000     case MATPRODUCT_AtB:
7001       if (product->api_user) {
7002         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
7003         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7004         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7005       } else {
7006         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
7007         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7008         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7009       }
7010       break;
7011     case MATPRODUCT_PtAP:
7012       if (product->api_user) {
7013         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
7014         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7015         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7016       } else {
7017         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
7018         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
7019         ierr = PetscOptionsEnd();CHKERRQ(ierr);
7020       }
7021       break;
7022     default:
7023       break;
7024     }
7025     match = (PetscBool)!usecpu;
7026   }
7027 #endif
7028   if (match) {
7029     switch (product->type) {
7030     case MATPRODUCT_AB:
7031     case MATPRODUCT_AtB:
7032     case MATPRODUCT_PtAP:
7033       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7034       break;
7035     default:
7036       break;
7037     }
7038   }
7039   /* fallback to MPIAIJ ops */
7040   if (!mat->ops->productsymbolic) {
7041     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7042   }
7043   PetscFunctionReturn(0);
7044 }
7045