xref: /petsc/src/mat/impls/aij/mpi/mpiaij.c (revision efa12513287cff49a2b9648ae83199dcbfaad71a)
1 #include <../src/mat/impls/aij/mpi/mpiaij.h>   /*I "petscmat.h" I*/
2 #include <petsc/private/vecimpl.h>
3 #include <petsc/private/sfimpl.h>
4 #include <petsc/private/isimpl.h>
5 #include <petscblaslapack.h>
6 #include <petscsf.h>
7 #include <petsc/private/hashmapi.h>
8 
9 /*MC
10    MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices.
11 
12    This matrix type is identical to MATSEQAIJ when constructed with a single process communicator,
13    and MATMPIAIJ otherwise.  As a result, for single process communicators,
14   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported
15   for communicators controlling multiple processes.  It is recommended that you call both of
16   the above preallocation routines for simplicity.
17 
18    Options Database Keys:
19 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions()
20 
21   Developer Notes:
22     Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when
23    enough exist.
24 
25   Level: beginner
26 
27 .seealso: MatCreateAIJ(), MatCreateSeqAIJ(), MATSEQAIJ, MATMPIAIJ
28 M*/
29 
30 /*MC
31    MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices.
32 
33    This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator,
34    and MATMPIAIJCRL otherwise.  As a result, for single process communicators,
35    MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
36   for communicators controlling multiple processes.  It is recommended that you call both of
37   the above preallocation routines for simplicity.
38 
39    Options Database Keys:
40 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions()
41 
42   Level: beginner
43 
44 .seealso: MatCreateMPIAIJCRL,MATSEQAIJCRL,MATMPIAIJCRL, MATSEQAIJCRL, MATMPIAIJCRL
45 M*/
46 
47 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A,PetscBool flg)
48 {
49   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL)
54   A->boundtocpu = flg;
55 #endif
56   if (a->A) {
57     ierr = MatBindToCPU(a->A,flg);CHKERRQ(ierr);
58   }
59   if (a->B) {
60     ierr = MatBindToCPU(a->B,flg);CHKERRQ(ierr);
61   }
62   PetscFunctionReturn(0);
63 }
64 
65 
66 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs)
67 {
68   PetscErrorCode ierr;
69   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)M->data;
70 
71   PetscFunctionBegin;
72   if (mat->A) {
73     ierr = MatSetBlockSizes(mat->A,rbs,cbs);CHKERRQ(ierr);
74     ierr = MatSetBlockSizes(mat->B,rbs,1);CHKERRQ(ierr);
75   }
76   PetscFunctionReturn(0);
77 }
78 
79 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M,IS *keptrows)
80 {
81   PetscErrorCode  ierr;
82   Mat_MPIAIJ      *mat = (Mat_MPIAIJ*)M->data;
83   Mat_SeqAIJ      *a   = (Mat_SeqAIJ*)mat->A->data;
84   Mat_SeqAIJ      *b   = (Mat_SeqAIJ*)mat->B->data;
85   const PetscInt  *ia,*ib;
86   const MatScalar *aa,*bb,*aav,*bav;
87   PetscInt        na,nb,i,j,*rows,cnt=0,n0rows;
88   PetscInt        m = M->rmap->n,rstart = M->rmap->rstart;
89 
90   PetscFunctionBegin;
91   *keptrows = NULL;
92 
93   ia   = a->i;
94   ib   = b->i;
95   ierr = MatSeqAIJGetArrayRead(mat->A,&aav);CHKERRQ(ierr);
96   ierr = MatSeqAIJGetArrayRead(mat->B,&bav);CHKERRQ(ierr);
97   for (i=0; i<m; i++) {
98     na = ia[i+1] - ia[i];
99     nb = ib[i+1] - ib[i];
100     if (!na && !nb) {
101       cnt++;
102       goto ok1;
103     }
104     aa = aav + ia[i];
105     for (j=0; j<na; j++) {
106       if (aa[j] != 0.0) goto ok1;
107     }
108     bb = bav + ib[i];
109     for (j=0; j <nb; j++) {
110       if (bb[j] != 0.0) goto ok1;
111     }
112     cnt++;
113 ok1:;
114   }
115   ierr = MPIU_Allreduce(&cnt,&n0rows,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)M));CHKERRQ(ierr);
116   if (!n0rows) {
117     ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
118     ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
119     PetscFunctionReturn(0);
120   }
121   ierr = PetscMalloc1(M->rmap->n-cnt,&rows);CHKERRQ(ierr);
122   cnt  = 0;
123   for (i=0; i<m; i++) {
124     na = ia[i+1] - ia[i];
125     nb = ib[i+1] - ib[i];
126     if (!na && !nb) continue;
127     aa = aav + ia[i];
128     for (j=0; j<na;j++) {
129       if (aa[j] != 0.0) {
130         rows[cnt++] = rstart + i;
131         goto ok2;
132       }
133     }
134     bb = bav + ib[i];
135     for (j=0; j<nb; j++) {
136       if (bb[j] != 0.0) {
137         rows[cnt++] = rstart + i;
138         goto ok2;
139       }
140     }
141 ok2:;
142   }
143   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),cnt,rows,PETSC_OWN_POINTER,keptrows);CHKERRQ(ierr);
144   ierr = MatSeqAIJRestoreArrayRead(mat->A,&aav);CHKERRQ(ierr);
145   ierr = MatSeqAIJRestoreArrayRead(mat->B,&bav);CHKERRQ(ierr);
146   PetscFunctionReturn(0);
147 }
148 
149 PetscErrorCode  MatDiagonalSet_MPIAIJ(Mat Y,Vec D,InsertMode is)
150 {
151   PetscErrorCode    ierr;
152   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*) Y->data;
153   PetscBool         cong;
154 
155   PetscFunctionBegin;
156   ierr = MatHasCongruentLayouts(Y,&cong);CHKERRQ(ierr);
157   if (Y->assembled && cong) {
158     ierr = MatDiagonalSet(aij->A,D,is);CHKERRQ(ierr);
159   } else {
160     ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr);
161   }
162   PetscFunctionReturn(0);
163 }
164 
165 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M,IS *zrows)
166 {
167   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)M->data;
168   PetscErrorCode ierr;
169   PetscInt       i,rstart,nrows,*rows;
170 
171   PetscFunctionBegin;
172   *zrows = NULL;
173   ierr   = MatFindZeroDiagonals_SeqAIJ_Private(aij->A,&nrows,&rows);CHKERRQ(ierr);
174   ierr   = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
175   for (i=0; i<nrows; i++) rows[i] += rstart;
176   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)M),nrows,rows,PETSC_OWN_POINTER,zrows);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 PetscErrorCode MatGetColumnNorms_MPIAIJ(Mat A,NormType type,PetscReal *norms)
181 {
182   PetscErrorCode    ierr;
183   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)A->data;
184   PetscInt          i,n,*garray = aij->garray;
185   Mat_SeqAIJ        *a_aij = (Mat_SeqAIJ*) aij->A->data;
186   Mat_SeqAIJ        *b_aij = (Mat_SeqAIJ*) aij->B->data;
187   PetscReal         *work;
188   const PetscScalar *dummy;
189 
190   PetscFunctionBegin;
191   ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr);
192   ierr = PetscCalloc1(n,&work);CHKERRQ(ierr);
193   ierr = MatSeqAIJGetArrayRead(aij->A,&dummy);CHKERRQ(ierr);
194   ierr = MatSeqAIJRestoreArrayRead(aij->A,&dummy);CHKERRQ(ierr);
195   ierr = MatSeqAIJGetArrayRead(aij->B,&dummy);CHKERRQ(ierr);
196   ierr = MatSeqAIJRestoreArrayRead(aij->B,&dummy);CHKERRQ(ierr);
197   if (type == NORM_2) {
198     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
199       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]*a_aij->a[i]);
200     }
201     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
202       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]*b_aij->a[i]);
203     }
204   } else if (type == NORM_1) {
205     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
206       work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]);
207     }
208     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
209       work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]);
210     }
211   } else if (type == NORM_INFINITY) {
212     for (i=0; i<a_aij->i[aij->A->rmap->n]; i++) {
213       work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]);
214     }
215     for (i=0; i<b_aij->i[aij->B->rmap->n]; i++) {
216       work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]),work[garray[b_aij->j[i]]]);
217     }
218 
219   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Unknown NormType");
220   if (type == NORM_INFINITY) {
221     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
222   } else {
223     ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
224   }
225   ierr = PetscFree(work);CHKERRQ(ierr);
226   if (type == NORM_2) {
227     for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]);
228   }
229   PetscFunctionReturn(0);
230 }
231 
232 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A,IS *is)
233 {
234   Mat_MPIAIJ      *a  = (Mat_MPIAIJ*)A->data;
235   IS              sis,gis;
236   PetscErrorCode  ierr;
237   const PetscInt  *isis,*igis;
238   PetscInt        n,*iis,nsis,ngis,rstart,i;
239 
240   PetscFunctionBegin;
241   ierr = MatFindOffBlockDiagonalEntries(a->A,&sis);CHKERRQ(ierr);
242   ierr = MatFindNonzeroRows(a->B,&gis);CHKERRQ(ierr);
243   ierr = ISGetSize(gis,&ngis);CHKERRQ(ierr);
244   ierr = ISGetSize(sis,&nsis);CHKERRQ(ierr);
245   ierr = ISGetIndices(sis,&isis);CHKERRQ(ierr);
246   ierr = ISGetIndices(gis,&igis);CHKERRQ(ierr);
247 
248   ierr = PetscMalloc1(ngis+nsis,&iis);CHKERRQ(ierr);
249   ierr = PetscArraycpy(iis,igis,ngis);CHKERRQ(ierr);
250   ierr = PetscArraycpy(iis+ngis,isis,nsis);CHKERRQ(ierr);
251   n    = ngis + nsis;
252   ierr = PetscSortRemoveDupsInt(&n,iis);CHKERRQ(ierr);
253   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
254   for (i=0; i<n; i++) iis[i] += rstart;
255   ierr = ISCreateGeneral(PetscObjectComm((PetscObject)A),n,iis,PETSC_OWN_POINTER,is);CHKERRQ(ierr);
256 
257   ierr = ISRestoreIndices(sis,&isis);CHKERRQ(ierr);
258   ierr = ISRestoreIndices(gis,&igis);CHKERRQ(ierr);
259   ierr = ISDestroy(&sis);CHKERRQ(ierr);
260   ierr = ISDestroy(&gis);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 
264 /*
265   Local utility routine that creates a mapping from the global column
266 number to the local number in the off-diagonal part of the local
267 storage of the matrix.  When PETSC_USE_CTABLE is used this is scalable at
268 a slightly higher hash table cost; without it it is not scalable (each processor
269 has an order N integer array but is fast to access.
270 */
271 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat)
272 {
273   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
274   PetscErrorCode ierr;
275   PetscInt       n = aij->B->cmap->n,i;
276 
277   PetscFunctionBegin;
278   if (!aij->garray) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MPIAIJ Matrix was assembled but is missing garray");
279 #if defined(PETSC_USE_CTABLE)
280   ierr = PetscTableCreate(n,mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
281   for (i=0; i<n; i++) {
282     ierr = PetscTableAdd(aij->colmap,aij->garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);
283   }
284 #else
285   ierr = PetscCalloc1(mat->cmap->N+1,&aij->colmap);CHKERRQ(ierr);
286   ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N+1)*sizeof(PetscInt));CHKERRQ(ierr);
287   for (i=0; i<n; i++) aij->colmap[aij->garray[i]] = i+1;
288 #endif
289   PetscFunctionReturn(0);
290 }
291 
292 #define MatSetValues_SeqAIJ_A_Private(row,col,value,addv,orow,ocol)     \
293 { \
294     if (col <= lastcol1)  low1 = 0;     \
295     else                 high1 = nrow1; \
296     lastcol1 = col;\
297     while (high1-low1 > 5) { \
298       t = (low1+high1)/2; \
299       if (rp1[t] > col) high1 = t; \
300       else              low1  = t; \
301     } \
302       for (_i=low1; _i<high1; _i++) { \
303         if (rp1[_i] > col) break; \
304         if (rp1[_i] == col) { \
305           if (addv == ADD_VALUES) { \
306             ap1[_i] += value;   \
307             /* Not sure LogFlops will slow dow the code or not */ \
308             (void)PetscLogFlops(1.0);   \
309            } \
310           else                    ap1[_i] = value; \
311           inserted = PETSC_TRUE; \
312           goto a_noinsert; \
313         } \
314       }  \
315       if (value == 0.0 && ignorezeroentries && row != col) {low1 = 0; high1 = nrow1;goto a_noinsert;} \
316       if (nonew == 1) {low1 = 0; high1 = nrow1; goto a_noinsert;}                \
317       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
318       MatSeqXAIJReallocateAIJ(A,am,1,nrow1,row,col,rmax1,aa,ai,aj,rp1,ap1,aimax,nonew,MatScalar); \
319       N = nrow1++ - 1; a->nz++; high1++; \
320       /* shift up all the later entries in this row */ \
321       ierr = PetscArraymove(rp1+_i+1,rp1+_i,N-_i+1);CHKERRQ(ierr);\
322       ierr = PetscArraymove(ap1+_i+1,ap1+_i,N-_i+1);CHKERRQ(ierr);\
323       rp1[_i] = col;  \
324       ap1[_i] = value;  \
325       A->nonzerostate++;\
326       a_noinsert: ; \
327       ailen[row] = nrow1; \
328 }
329 
330 #define MatSetValues_SeqAIJ_B_Private(row,col,value,addv,orow,ocol) \
331   { \
332     if (col <= lastcol2) low2 = 0;                        \
333     else high2 = nrow2;                                   \
334     lastcol2 = col;                                       \
335     while (high2-low2 > 5) {                              \
336       t = (low2+high2)/2;                                 \
337       if (rp2[t] > col) high2 = t;                        \
338       else             low2  = t;                         \
339     }                                                     \
340     for (_i=low2; _i<high2; _i++) {                       \
341       if (rp2[_i] > col) break;                           \
342       if (rp2[_i] == col) {                               \
343         if (addv == ADD_VALUES) {                         \
344           ap2[_i] += value;                               \
345           (void)PetscLogFlops(1.0);                       \
346         }                                                 \
347         else                    ap2[_i] = value;          \
348         inserted = PETSC_TRUE;                            \
349         goto b_noinsert;                                  \
350       }                                                   \
351     }                                                     \
352     if (value == 0.0 && ignorezeroentries) {low2 = 0; high2 = nrow2; goto b_noinsert;} \
353     if (nonew == 1) {low2 = 0; high2 = nrow2; goto b_noinsert;}                        \
354     if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", orow, ocol); \
355     MatSeqXAIJReallocateAIJ(B,bm,1,nrow2,row,col,rmax2,ba,bi,bj,rp2,ap2,bimax,nonew,MatScalar); \
356     N = nrow2++ - 1; b->nz++; high2++;                    \
357     /* shift up all the later entries in this row */      \
358     ierr = PetscArraymove(rp2+_i+1,rp2+_i,N-_i+1);CHKERRQ(ierr);\
359     ierr = PetscArraymove(ap2+_i+1,ap2+_i,N-_i+1);CHKERRQ(ierr);\
360     rp2[_i] = col;                                        \
361     ap2[_i] = value;                                      \
362     B->nonzerostate++;                                    \
363     b_noinsert: ;                                         \
364     bilen[row] = nrow2;                                   \
365   }
366 
367 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A,PetscInt row,const PetscScalar v[])
368 {
369   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)A->data;
370   Mat_SeqAIJ     *a   = (Mat_SeqAIJ*)mat->A->data,*b = (Mat_SeqAIJ*)mat->B->data;
371   PetscErrorCode ierr;
372   PetscInt       l,*garray = mat->garray,diag;
373 
374   PetscFunctionBegin;
375   /* code only works for square matrices A */
376 
377   /* find size of row to the left of the diagonal part */
378   ierr = MatGetOwnershipRange(A,&diag,NULL);CHKERRQ(ierr);
379   row  = row - diag;
380   for (l=0; l<b->i[row+1]-b->i[row]; l++) {
381     if (garray[b->j[b->i[row]+l]] > diag) break;
382   }
383   ierr = PetscArraycpy(b->a+b->i[row],v,l);CHKERRQ(ierr);
384 
385   /* diagonal part */
386   ierr = PetscArraycpy(a->a+a->i[row],v+l,(a->i[row+1]-a->i[row]));CHKERRQ(ierr);
387 
388   /* right of diagonal part */
389   ierr = PetscArraycpy(b->a+b->i[row]+l,v+l+a->i[row+1]-a->i[row],b->i[row+1]-b->i[row]-l);CHKERRQ(ierr);
390 #if defined(PETSC_HAVE_DEVICE)
391   if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && (l || (a->i[row+1]-a->i[row]) || (b->i[row+1]-b->i[row]-l))) A->offloadmask = PETSC_OFFLOAD_CPU;
392 #endif
393   PetscFunctionReturn(0);
394 }
395 
396 PetscErrorCode MatSetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
397 {
398   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
399   PetscScalar    value = 0.0;
400   PetscErrorCode ierr;
401   PetscInt       i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
402   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
403   PetscBool      roworiented = aij->roworiented;
404 
405   /* Some Variables required in the macro */
406   Mat        A                    = aij->A;
407   Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
408   PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
409   PetscBool  ignorezeroentries    = a->ignorezeroentries;
410   Mat        B                    = aij->B;
411   Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
412   PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
413   MatScalar  *aa,*ba;
414   /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
415    * cannot use "#if defined" inside a macro. */
416   PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
417 
418   PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
419   PetscInt  nonew;
420   MatScalar *ap1,*ap2;
421 
422   PetscFunctionBegin;
423 #if defined(PETSC_HAVE_DEVICE)
424   if (A->offloadmask == PETSC_OFFLOAD_GPU) {
425     const PetscScalar *dummy;
426     ierr = MatSeqAIJGetArrayRead(A,&dummy);CHKERRQ(ierr);
427     ierr = MatSeqAIJRestoreArrayRead(A,&dummy);CHKERRQ(ierr);
428   }
429   if (B->offloadmask == PETSC_OFFLOAD_GPU) {
430     const PetscScalar *dummy;
431     ierr = MatSeqAIJGetArrayRead(B,&dummy);CHKERRQ(ierr);
432     ierr = MatSeqAIJRestoreArrayRead(B,&dummy);CHKERRQ(ierr);
433   }
434 #endif
435   aa = a->a;
436   ba = b->a;
437   for (i=0; i<m; i++) {
438     if (im[i] < 0) continue;
439     if (PetscUnlikely(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
440     if (im[i] >= rstart && im[i] < rend) {
441       row      = im[i] - rstart;
442       lastcol1 = -1;
443       rp1      = aj + ai[row];
444       ap1      = aa + ai[row];
445       rmax1    = aimax[row];
446       nrow1    = ailen[row];
447       low1     = 0;
448       high1    = nrow1;
449       lastcol2 = -1;
450       rp2      = bj + bi[row];
451       ap2      = ba + bi[row];
452       rmax2    = bimax[row];
453       nrow2    = bilen[row];
454       low2     = 0;
455       high2    = nrow2;
456 
457       for (j=0; j<n; j++) {
458         if (v)  value = roworiented ? v[i*n+j] : v[i+j*m];
459         if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
460         if (in[j] >= cstart && in[j] < cend) {
461           col   = in[j] - cstart;
462           nonew = a->nonew;
463           MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
464 #if defined(PETSC_HAVE_DEVICE)
465           if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
466 #endif
467         } else if (in[j] < 0) continue;
468         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
469         else {
470           if (mat->was_assembled) {
471             if (!aij->colmap) {
472               ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
473             }
474 #if defined(PETSC_USE_CTABLE)
475             ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
476             col--;
477 #else
478             col = aij->colmap[in[j]] - 1;
479 #endif
480             if (col < 0 && !((Mat_SeqAIJ*)(aij->B->data))->nonew) {
481               ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
482               col  =  in[j];
483               /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
484               B        = aij->B;
485               b        = (Mat_SeqAIJ*)B->data;
486               bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j; ba = b->a;
487               rp2      = bj + bi[row];
488               ap2      = ba + bi[row];
489               rmax2    = bimax[row];
490               nrow2    = bilen[row];
491               low2     = 0;
492               high2    = nrow2;
493               bm       = aij->B->rmap->n;
494               ba       = b->a;
495               inserted = PETSC_FALSE;
496             } else if (col < 0) {
497               if (1 == ((Mat_SeqAIJ*)(aij->B->data))->nonew) {
498                 ierr = PetscInfo3(mat,"Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%D,%D)\n",(double)PetscRealPart(value),im[i],in[j]);CHKERRQ(ierr);
499               } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero at global row/column (%D, %D) into matrix", im[i], in[j]);
500             }
501           } else col = in[j];
502           nonew = b->nonew;
503           MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
504 #if defined(PETSC_HAVE_DEVICE)
505           if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
506 #endif
507         }
508       }
509     } else {
510       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
511       if (!aij->donotstash) {
512         mat->assembled = PETSC_FALSE;
513         if (roworiented) {
514           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
515         } else {
516           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
517         }
518       }
519     }
520   }
521   PetscFunctionReturn(0);
522 }
523 
524 /*
525     This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
526     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
527     No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE.
528 */
529 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[])
530 {
531   Mat_MPIAIJ     *aij        = (Mat_MPIAIJ*)mat->data;
532   Mat            A           = aij->A; /* diagonal part of the matrix */
533   Mat            B           = aij->B; /* offdiagonal part of the matrix */
534   Mat_SeqAIJ     *a          = (Mat_SeqAIJ*)A->data;
535   Mat_SeqAIJ     *b          = (Mat_SeqAIJ*)B->data;
536   PetscInt       cstart      = mat->cmap->rstart,cend = mat->cmap->rend,col;
537   PetscInt       *ailen      = a->ilen,*aj = a->j;
538   PetscInt       *bilen      = b->ilen,*bj = b->j;
539   PetscInt       am          = aij->A->rmap->n,j;
540   PetscInt       diag_so_far = 0,dnz;
541   PetscInt       offd_so_far = 0,onz;
542 
543   PetscFunctionBegin;
544   /* Iterate over all rows of the matrix */
545   for (j=0; j<am; j++) {
546     dnz = onz = 0;
547     /*  Iterate over all non-zero columns of the current row */
548     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
549       /* If column is in the diagonal */
550       if (mat_j[col] >= cstart && mat_j[col] < cend) {
551         aj[diag_so_far++] = mat_j[col] - cstart;
552         dnz++;
553       } else { /* off-diagonal entries */
554         bj[offd_so_far++] = mat_j[col];
555         onz++;
556       }
557     }
558     ailen[j] = dnz;
559     bilen[j] = onz;
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 /*
565     This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix.
566     The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like).
567     No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ.
568     Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
569     would not be true and the more complex MatSetValues_MPIAIJ has to be used.
570 */
571 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat,const PetscInt mat_j[],const PetscInt mat_i[],const PetscScalar mat_a[])
572 {
573   Mat_MPIAIJ     *aij   = (Mat_MPIAIJ*)mat->data;
574   Mat            A      = aij->A; /* diagonal part of the matrix */
575   Mat            B      = aij->B; /* offdiagonal part of the matrix */
576   Mat_SeqAIJ     *aijd  =(Mat_SeqAIJ*)(aij->A)->data,*aijo=(Mat_SeqAIJ*)(aij->B)->data;
577   Mat_SeqAIJ     *a     = (Mat_SeqAIJ*)A->data;
578   Mat_SeqAIJ     *b     = (Mat_SeqAIJ*)B->data;
579   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend;
580   PetscInt       *ailen = a->ilen,*aj = a->j;
581   PetscInt       *bilen = b->ilen,*bj = b->j;
582   PetscInt       am     = aij->A->rmap->n,j;
583   PetscInt       *full_diag_i=aijd->i,*full_offd_i=aijo->i; /* These variables can also include non-local elements, which are set at a later point. */
584   PetscInt       col,dnz_row,onz_row,rowstart_diag,rowstart_offd;
585   PetscScalar    *aa = a->a,*ba = b->a;
586 
587   PetscFunctionBegin;
588   /* Iterate over all rows of the matrix */
589   for (j=0; j<am; j++) {
590     dnz_row = onz_row = 0;
591     rowstart_offd = full_offd_i[j];
592     rowstart_diag = full_diag_i[j];
593     /*  Iterate over all non-zero columns of the current row */
594     for (col=mat_i[j]; col<mat_i[j+1]; col++) {
595       /* If column is in the diagonal */
596       if (mat_j[col] >= cstart && mat_j[col] < cend) {
597         aj[rowstart_diag+dnz_row] = mat_j[col] - cstart;
598         aa[rowstart_diag+dnz_row] = mat_a[col];
599         dnz_row++;
600       } else { /* off-diagonal entries */
601         bj[rowstart_offd+onz_row] = mat_j[col];
602         ba[rowstart_offd+onz_row] = mat_a[col];
603         onz_row++;
604       }
605     }
606     ailen[j] = dnz_row;
607     bilen[j] = onz_row;
608   }
609   PetscFunctionReturn(0);
610 }
611 
612 PetscErrorCode MatGetValues_MPIAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
613 {
614   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
615   PetscErrorCode ierr;
616   PetscInt       i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend;
617   PetscInt       cstart = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
618 
619   PetscFunctionBegin;
620   for (i=0; i<m; i++) {
621     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);*/
622     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
623     if (idxm[i] >= rstart && idxm[i] < rend) {
624       row = idxm[i] - rstart;
625       for (j=0; j<n; j++) {
626         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column: %D",idxn[j]); */
627         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
628         if (idxn[j] >= cstart && idxn[j] < cend) {
629           col  = idxn[j] - cstart;
630           ierr = MatGetValues(aij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
631         } else {
632           if (!aij->colmap) {
633             ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
634           }
635 #if defined(PETSC_USE_CTABLE)
636           ierr = PetscTableFind(aij->colmap,idxn[j]+1,&col);CHKERRQ(ierr);
637           col--;
638 #else
639           col = aij->colmap[idxn[j]] - 1;
640 #endif
641           if ((col < 0) || (aij->garray[col] != idxn[j])) *(v+i*n+j) = 0.0;
642           else {
643             ierr = MatGetValues(aij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
644           }
645         }
646       }
647     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
648   }
649   PetscFunctionReturn(0);
650 }
651 
652 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat,MatAssemblyType mode)
653 {
654   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
655   PetscErrorCode ierr;
656   PetscInt       nstash,reallocs;
657 
658   PetscFunctionBegin;
659   if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
660 
661   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   ierr = PetscInfo2(aij->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat,MatAssemblyType mode)
668 {
669   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
670   PetscErrorCode ierr;
671   PetscMPIInt    n;
672   PetscInt       i,j,rstart,ncols,flg;
673   PetscInt       *row,*col;
674   PetscBool      other_disassembled;
675   PetscScalar    *val;
676 
677   /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */
678 
679   PetscFunctionBegin;
680   if (!aij->donotstash && !mat->nooffprocentries) {
681     while (1) {
682       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
683       if (!flg) break;
684 
685       for (i=0; i<n;) {
686         /* Now identify the consecutive vals belonging to the same row */
687         for (j=i,rstart=row[j]; j<n; j++) {
688           if (row[j] != rstart) break;
689         }
690         if (j < n) ncols = j-i;
691         else       ncols = n-i;
692         /* Now assemble all these values with a single function call */
693         ierr = MatSetValues_MPIAIJ(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr);
694         i    = j;
695       }
696     }
697     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
698   }
699 #if defined(PETSC_HAVE_DEVICE)
700   if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU;
701   /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */
702   if (mat->boundtocpu) {
703     ierr = MatBindToCPU(aij->A,PETSC_TRUE);CHKERRQ(ierr);
704     ierr = MatBindToCPU(aij->B,PETSC_TRUE);CHKERRQ(ierr);
705   }
706 #endif
707   ierr = MatAssemblyBegin(aij->A,mode);CHKERRQ(ierr);
708   ierr = MatAssemblyEnd(aij->A,mode);CHKERRQ(ierr);
709 
710   /* determine if any processor has disassembled, if so we must
711      also disassemble ourself, in order that we may reassemble. */
712   /*
713      if nonzero structure of submatrix B cannot change then we know that
714      no processor disassembled thus we can skip this stuff
715   */
716   if (!((Mat_SeqAIJ*)aij->B->data)->nonew) {
717     ierr = MPIU_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
718     if (mat->was_assembled && !other_disassembled) {
719 #if defined(PETSC_HAVE_DEVICE)
720       aij->B->offloadmask = PETSC_OFFLOAD_BOTH; /* do not copy on the GPU when assembling inside MatDisAssemble_MPIAIJ */
721 #endif
722       ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
723     }
724   }
725   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
726     ierr = MatSetUpMultiply_MPIAIJ(mat);CHKERRQ(ierr);
727   }
728   ierr = MatSetOption(aij->B,MAT_USE_INODES,PETSC_FALSE);CHKERRQ(ierr);
729 #if defined(PETSC_HAVE_DEVICE)
730   if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU;
731 #endif
732   ierr = MatAssemblyBegin(aij->B,mode);CHKERRQ(ierr);
733   ierr = MatAssemblyEnd(aij->B,mode);CHKERRQ(ierr);
734 
735   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
736 
737   aij->rowvalues = NULL;
738 
739   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
740 
741   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
742   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
743     PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate;
744     ierr = MPIU_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
745   }
746 #if defined(PETSC_HAVE_DEVICE)
747   mat->offloadmask = PETSC_OFFLOAD_BOTH;
748 #endif
749   PetscFunctionReturn(0);
750 }
751 
752 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A)
753 {
754   Mat_MPIAIJ     *l = (Mat_MPIAIJ*)A->data;
755   PetscErrorCode ierr;
756 
757   PetscFunctionBegin;
758   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
759   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
760   PetscFunctionReturn(0);
761 }
762 
763 PetscErrorCode MatZeroRows_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
764 {
765   Mat_MPIAIJ      *mat = (Mat_MPIAIJ *) A->data;
766   PetscObjectState sA, sB;
767   PetscInt        *lrows;
768   PetscInt         r, len;
769   PetscBool        cong, lch, gch;
770   PetscErrorCode   ierr;
771 
772   PetscFunctionBegin;
773   /* get locally owned rows */
774   ierr = MatZeroRowsMapLocal_Private(A,N,rows,&len,&lrows);CHKERRQ(ierr);
775   ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
776   /* fix right hand side if needed */
777   if (x && b) {
778     const PetscScalar *xx;
779     PetscScalar       *bb;
780 
781     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
782     ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr);
783     ierr = VecGetArray(b, &bb);CHKERRQ(ierr);
784     for (r = 0; r < len; ++r) bb[lrows[r]] = diag*xx[lrows[r]];
785     ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr);
786     ierr = VecRestoreArray(b, &bb);CHKERRQ(ierr);
787   }
788 
789   sA = mat->A->nonzerostate;
790   sB = mat->B->nonzerostate;
791 
792   if (diag != 0.0 && cong) {
793     ierr = MatZeroRows(mat->A, len, lrows, diag, NULL, NULL);CHKERRQ(ierr);
794     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
795   } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */
796     Mat_SeqAIJ *aijA = (Mat_SeqAIJ*)mat->A->data;
797     Mat_SeqAIJ *aijB = (Mat_SeqAIJ*)mat->B->data;
798     PetscInt   nnwA, nnwB;
799     PetscBool  nnzA, nnzB;
800 
801     nnwA = aijA->nonew;
802     nnwB = aijB->nonew;
803     nnzA = aijA->keepnonzeropattern;
804     nnzB = aijB->keepnonzeropattern;
805     if (!nnzA) {
806       ierr = PetscInfo(mat->A,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n");CHKERRQ(ierr);
807       aijA->nonew = 0;
808     }
809     if (!nnzB) {
810       ierr = PetscInfo(mat->B,"Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n");CHKERRQ(ierr);
811       aijB->nonew = 0;
812     }
813     /* Must zero here before the next loop */
814     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
815     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
816     for (r = 0; r < len; ++r) {
817       const PetscInt row = lrows[r] + A->rmap->rstart;
818       if (row >= A->cmap->N) continue;
819       ierr = MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES);CHKERRQ(ierr);
820     }
821     aijA->nonew = nnwA;
822     aijB->nonew = nnwB;
823   } else {
824     ierr = MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
825     ierr = MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL);CHKERRQ(ierr);
826   }
827   ierr = PetscFree(lrows);CHKERRQ(ierr);
828   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
829   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
830 
831   /* reduce nonzerostate */
832   lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate);
833   ierr = MPIU_Allreduce(&lch,&gch,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
834   if (gch) A->nonzerostate++;
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A,PetscInt N,const PetscInt rows[],PetscScalar diag,Vec x,Vec b)
839 {
840   Mat_MPIAIJ        *l = (Mat_MPIAIJ*)A->data;
841   PetscErrorCode    ierr;
842   PetscMPIInt       n = A->rmap->n;
843   PetscInt          i,j,r,m,len = 0;
844   PetscInt          *lrows,*owners = A->rmap->range;
845   PetscMPIInt       p = 0;
846   PetscSFNode       *rrows;
847   PetscSF           sf;
848   const PetscScalar *xx;
849   PetscScalar       *bb,*mask;
850   Vec               xmask,lmask;
851   Mat_SeqAIJ        *aij = (Mat_SeqAIJ*)l->B->data;
852   const PetscInt    *aj, *ii,*ridx;
853   PetscScalar       *aa;
854 
855   PetscFunctionBegin;
856   /* Create SF where leaves are input rows and roots are owned rows */
857   ierr = PetscMalloc1(n, &lrows);CHKERRQ(ierr);
858   for (r = 0; r < n; ++r) lrows[r] = -1;
859   ierr = PetscMalloc1(N, &rrows);CHKERRQ(ierr);
860   for (r = 0; r < N; ++r) {
861     const PetscInt idx   = rows[r];
862     if (idx < 0 || A->rmap->N <= idx) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range [0,%D)",idx,A->rmap->N);
863     if (idx < owners[p] || owners[p+1] <= idx) { /* short-circuit the search if the last p owns this row too */
864       ierr = PetscLayoutFindOwner(A->rmap,idx,&p);CHKERRQ(ierr);
865     }
866     rrows[r].rank  = p;
867     rrows[r].index = rows[r] - owners[p];
868   }
869   ierr = PetscSFCreate(PetscObjectComm((PetscObject) A), &sf);CHKERRQ(ierr);
870   ierr = PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER);CHKERRQ(ierr);
871   /* Collect flags for rows to be zeroed */
872   ierr = PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
873   ierr = PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *) rows, lrows, MPI_LOR);CHKERRQ(ierr);
874   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
875   /* Compress and put in row numbers */
876   for (r = 0; r < n; ++r) if (lrows[r] >= 0) lrows[len++] = r;
877   /* zero diagonal part of matrix */
878   ierr = MatZeroRowsColumns(l->A,len,lrows,diag,x,b);CHKERRQ(ierr);
879   /* handle off diagonal part of matrix */
880   ierr = MatCreateVecs(A,&xmask,NULL);CHKERRQ(ierr);
881   ierr = VecDuplicate(l->lvec,&lmask);CHKERRQ(ierr);
882   ierr = VecGetArray(xmask,&bb);CHKERRQ(ierr);
883   for (i=0; i<len; i++) bb[lrows[i]] = 1;
884   ierr = VecRestoreArray(xmask,&bb);CHKERRQ(ierr);
885   ierr = VecScatterBegin(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = VecScatterEnd(l->Mvctx,xmask,lmask,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = VecDestroy(&xmask);CHKERRQ(ierr);
888   if (x && b) { /* this code is buggy when the row and column layout don't match */
889     PetscBool cong;
890 
891     ierr = MatHasCongruentLayouts(A,&cong);CHKERRQ(ierr);
892     if (!cong) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Need matching row/col layout");
893     ierr = VecScatterBegin(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894     ierr = VecScatterEnd(l->Mvctx,x,l->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895     ierr = VecGetArrayRead(l->lvec,&xx);CHKERRQ(ierr);
896     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
897   }
898   ierr = VecGetArray(lmask,&mask);CHKERRQ(ierr);
899   /* remove zeroed rows of off diagonal matrix */
900   ii = aij->i;
901   for (i=0; i<len; i++) {
902     ierr = PetscArrayzero(aij->a + ii[lrows[i]],ii[lrows[i]+1] - ii[lrows[i]]);CHKERRQ(ierr);
903   }
904   /* loop over all elements of off process part of matrix zeroing removed columns*/
905   if (aij->compressedrow.use) {
906     m    = aij->compressedrow.nrows;
907     ii   = aij->compressedrow.i;
908     ridx = aij->compressedrow.rindex;
909     for (i=0; i<m; i++) {
910       n  = ii[i+1] - ii[i];
911       aj = aij->j + ii[i];
912       aa = aij->a + ii[i];
913 
914       for (j=0; j<n; j++) {
915         if (PetscAbsScalar(mask[*aj])) {
916           if (b) bb[*ridx] -= *aa*xx[*aj];
917           *aa = 0.0;
918         }
919         aa++;
920         aj++;
921       }
922       ridx++;
923     }
924   } else { /* do not use compressed row format */
925     m = l->B->rmap->n;
926     for (i=0; i<m; i++) {
927       n  = ii[i+1] - ii[i];
928       aj = aij->j + ii[i];
929       aa = aij->a + ii[i];
930       for (j=0; j<n; j++) {
931         if (PetscAbsScalar(mask[*aj])) {
932           if (b) bb[i] -= *aa*xx[*aj];
933           *aa = 0.0;
934         }
935         aa++;
936         aj++;
937       }
938     }
939   }
940   if (x && b) {
941     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
942     ierr = VecRestoreArrayRead(l->lvec,&xx);CHKERRQ(ierr);
943   }
944   ierr = VecRestoreArray(lmask,&mask);CHKERRQ(ierr);
945   ierr = VecDestroy(&lmask);CHKERRQ(ierr);
946   ierr = PetscFree(lrows);CHKERRQ(ierr);
947 
948   /* only change matrix nonzero state if pattern was allowed to be changed */
949   if (!((Mat_SeqAIJ*)(l->A->data))->keepnonzeropattern) {
950     PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate;
951     ierr = MPIU_Allreduce(&state,&A->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
952   }
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatMult_MPIAIJ(Mat A,Vec xx,Vec yy)
957 {
958   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
959   PetscErrorCode ierr;
960   PetscInt       nt;
961   VecScatter     Mvctx = a->Mvctx;
962 
963   PetscFunctionBegin;
964   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
965   if (nt != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A (%D) and xx (%D)",A->cmap->n,nt);
966   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
968   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
969   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A,Vec bb,Vec xx)
974 {
975   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatMultDiagonalBlock(a->A,bb,xx);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 PetscErrorCode MatMultAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
984 {
985   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
986   PetscErrorCode ierr;
987   VecScatter     Mvctx = a->Mvctx;
988 
989   PetscFunctionBegin;
990   ierr = VecScatterBegin(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
992   ierr = VecScatterEnd(Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
993   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
994   PetscFunctionReturn(0);
995 }
996 
997 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A,Vec xx,Vec yy)
998 {
999   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   /* do nondiagonal part */
1004   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1005   /* do local part */
1006   ierr = (*a->A->ops->multtranspose)(a->A,xx,yy);CHKERRQ(ierr);
1007   /* add partial results together */
1008   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1009   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat,Mat Bmat,PetscReal tol,PetscBool  *f)
1014 {
1015   MPI_Comm       comm;
1016   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*) Amat->data, *Bij;
1017   Mat            Adia = Aij->A, Bdia, Aoff,Boff,*Aoffs,*Boffs;
1018   IS             Me,Notme;
1019   PetscErrorCode ierr;
1020   PetscInt       M,N,first,last,*notme,i;
1021   PetscBool      lf;
1022   PetscMPIInt    size;
1023 
1024   PetscFunctionBegin;
1025   /* Easy test: symmetric diagonal block */
1026   Bij  = (Mat_MPIAIJ*) Bmat->data; Bdia = Bij->A;
1027   ierr = MatIsTranspose(Adia,Bdia,tol,&lf);CHKERRQ(ierr);
1028   ierr = MPIU_Allreduce(&lf,f,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)Amat));CHKERRQ(ierr);
1029   if (!*f) PetscFunctionReturn(0);
1030   ierr = PetscObjectGetComm((PetscObject)Amat,&comm);CHKERRQ(ierr);
1031   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
1032   if (size == 1) PetscFunctionReturn(0);
1033 
1034   /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */
1035   ierr = MatGetSize(Amat,&M,&N);CHKERRQ(ierr);
1036   ierr = MatGetOwnershipRange(Amat,&first,&last);CHKERRQ(ierr);
1037   ierr = PetscMalloc1(N-last+first,&notme);CHKERRQ(ierr);
1038   for (i=0; i<first; i++) notme[i] = i;
1039   for (i=last; i<M; i++) notme[i-last+first] = i;
1040   ierr = ISCreateGeneral(MPI_COMM_SELF,N-last+first,notme,PETSC_COPY_VALUES,&Notme);CHKERRQ(ierr);
1041   ierr = ISCreateStride(MPI_COMM_SELF,last-first,first,1,&Me);CHKERRQ(ierr);
1042   ierr = MatCreateSubMatrices(Amat,1,&Me,&Notme,MAT_INITIAL_MATRIX,&Aoffs);CHKERRQ(ierr);
1043   Aoff = Aoffs[0];
1044   ierr = MatCreateSubMatrices(Bmat,1,&Notme,&Me,MAT_INITIAL_MATRIX,&Boffs);CHKERRQ(ierr);
1045   Boff = Boffs[0];
1046   ierr = MatIsTranspose(Aoff,Boff,tol,f);CHKERRQ(ierr);
1047   ierr = MatDestroyMatrices(1,&Aoffs);CHKERRQ(ierr);
1048   ierr = MatDestroyMatrices(1,&Boffs);CHKERRQ(ierr);
1049   ierr = ISDestroy(&Me);CHKERRQ(ierr);
1050   ierr = ISDestroy(&Notme);CHKERRQ(ierr);
1051   ierr = PetscFree(notme);CHKERRQ(ierr);
1052   PetscFunctionReturn(0);
1053 }
1054 
1055 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A,PetscReal tol,PetscBool  *f)
1056 {
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   ierr = MatIsTranspose_MPIAIJ(A,A,tol,f);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1065 {
1066   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1067   PetscErrorCode ierr;
1068 
1069   PetscFunctionBegin;
1070   /* do nondiagonal part */
1071   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1072   /* do local part */
1073   ierr = (*a->A->ops->multtransposeadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1074   /* add partial results together */
1075   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1076   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 /*
1081   This only works correctly for square matrices where the subblock A->A is the
1082    diagonal block
1083 */
1084 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A,Vec v)
1085 {
1086   PetscErrorCode ierr;
1087   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1088 
1089   PetscFunctionBegin;
1090   if (A->rmap->N != A->cmap->N) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block");
1091   if (A->rmap->rstart != A->cmap->rstart || A->rmap->rend != A->cmap->rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"row partition must equal col partition");
1092   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatScale_MPIAIJ(Mat A,PetscScalar aa)
1097 {
1098   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1103   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatDestroy_MPIAIJ(Mat mat)
1108 {
1109   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113 #if defined(PETSC_USE_LOG)
1114   PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N);
1115 #endif
1116   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
1117   ierr = VecDestroy(&aij->diag);CHKERRQ(ierr);
1118   ierr = MatDestroy(&aij->A);CHKERRQ(ierr);
1119   ierr = MatDestroy(&aij->B);CHKERRQ(ierr);
1120 #if defined(PETSC_USE_CTABLE)
1121   ierr = PetscTableDestroy(&aij->colmap);CHKERRQ(ierr);
1122 #else
1123   ierr = PetscFree(aij->colmap);CHKERRQ(ierr);
1124 #endif
1125   ierr = PetscFree(aij->garray);CHKERRQ(ierr);
1126   ierr = VecDestroy(&aij->lvec);CHKERRQ(ierr);
1127   ierr = VecScatterDestroy(&aij->Mvctx);CHKERRQ(ierr);
1128   ierr = PetscFree2(aij->rowvalues,aij->rowindices);CHKERRQ(ierr);
1129   ierr = PetscFree(aij->ld);CHKERRQ(ierr);
1130   ierr = PetscFree(mat->data);CHKERRQ(ierr);
1131 
1132   /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */
1133   ierr = PetscObjectCompose((PetscObject)mat,"MatMergeSeqsToMPI",NULL);CHKERRQ(ierr);
1134 
1135   ierr = PetscObjectChangeTypeName((PetscObject)mat,NULL);CHKERRQ(ierr);
1136   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1137   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1138   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1139   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1140   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatResetPreallocation_C",NULL);CHKERRQ(ierr);
1141   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1142   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDiagonalScaleLocal_C",NULL);CHKERRQ(ierr);
1143   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpibaij_C",NULL);CHKERRQ(ierr);
1144   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisbaij_C",NULL);CHKERRQ(ierr);
1145 #if defined(PETSC_HAVE_CUDA)
1146   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcusparse_C",NULL);CHKERRQ(ierr);
1147 #endif
1148 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
1149   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijkokkos_C",NULL);CHKERRQ(ierr);
1150 #endif
1151 #if defined(PETSC_HAVE_ELEMENTAL)
1152   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_elemental_C",NULL);CHKERRQ(ierr);
1153 #endif
1154 #if defined(PETSC_HAVE_SCALAPACK)
1155   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_scalapack_C",NULL);CHKERRQ(ierr);
1156 #endif
1157 #if defined(PETSC_HAVE_HYPRE)
1158   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1159   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1160 #endif
1161   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1162   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_is_mpiaij_C",NULL);CHKERRQ(ierr);
1163   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpiaij_C",NULL);CHKERRQ(ierr);
1164   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIAIJSetUseScalableIncreaseOverlap_C",NULL);CHKERRQ(ierr);
1165   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijperm_C",NULL);CHKERRQ(ierr);
1166   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijsell_C",NULL);CHKERRQ(ierr);
1167 #if defined(PETSC_HAVE_MKL_SPARSE)
1168   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijmkl_C",NULL);CHKERRQ(ierr);
1169 #endif
1170   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpiaijcrl_C",NULL);CHKERRQ(ierr);
1171   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_is_C",NULL);CHKERRQ(ierr);
1172   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpisell_C",NULL);CHKERRQ(ierr);
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat,PetscViewer viewer)
1177 {
1178   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1179   Mat_SeqAIJ        *A   = (Mat_SeqAIJ*)aij->A->data;
1180   Mat_SeqAIJ        *B   = (Mat_SeqAIJ*)aij->B->data;
1181   const PetscInt    *garray = aij->garray;
1182   const PetscScalar *aa,*ba;
1183   PetscInt          header[4],M,N,m,rs,cs,nz,cnt,i,ja,jb;
1184   PetscInt          *rowlens;
1185   PetscInt          *colidxs;
1186   PetscScalar       *matvals;
1187   PetscErrorCode    ierr;
1188 
1189   PetscFunctionBegin;
1190   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1191 
1192   M  = mat->rmap->N;
1193   N  = mat->cmap->N;
1194   m  = mat->rmap->n;
1195   rs = mat->rmap->rstart;
1196   cs = mat->cmap->rstart;
1197   nz = A->nz + B->nz;
1198 
1199   /* write matrix header */
1200   header[0] = MAT_FILE_CLASSID;
1201   header[1] = M; header[2] = N; header[3] = nz;
1202   ierr = MPI_Reduce(&nz,&header[3],1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1203   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1204 
1205   /* fill in and store row lengths  */
1206   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1207   for (i=0; i<m; i++) rowlens[i] = A->i[i+1] - A->i[i] + B->i[i+1] - B->i[i];
1208   ierr = PetscViewerBinaryWriteAll(viewer,rowlens,m,rs,M,PETSC_INT);CHKERRQ(ierr);
1209   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1210 
1211   /* fill in and store column indices */
1212   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1213   for (cnt=0, i=0; i<m; i++) {
1214     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1215       if (garray[B->j[jb]] > cs) break;
1216       colidxs[cnt++] = garray[B->j[jb]];
1217     }
1218     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1219       colidxs[cnt++] = A->j[ja] + cs;
1220     for (; jb<B->i[i+1]; jb++)
1221       colidxs[cnt++] = garray[B->j[jb]];
1222   }
1223   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1224   ierr = PetscViewerBinaryWriteAll(viewer,colidxs,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
1225   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1226 
1227   /* fill in and store nonzero values */
1228   ierr = MatSeqAIJGetArrayRead(aij->A,&aa);CHKERRQ(ierr);
1229   ierr = MatSeqAIJGetArrayRead(aij->B,&ba);CHKERRQ(ierr);
1230   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1231   for (cnt=0, i=0; i<m; i++) {
1232     for (jb=B->i[i]; jb<B->i[i+1]; jb++) {
1233       if (garray[B->j[jb]] > cs) break;
1234       matvals[cnt++] = ba[jb];
1235     }
1236     for (ja=A->i[i]; ja<A->i[i+1]; ja++)
1237       matvals[cnt++] = aa[ja];
1238     for (; jb<B->i[i+1]; jb++)
1239       matvals[cnt++] = ba[jb];
1240   }
1241   ierr = MatSeqAIJRestoreArrayRead(aij->A,&aa);CHKERRQ(ierr);
1242   ierr = MatSeqAIJRestoreArrayRead(aij->B,&ba);CHKERRQ(ierr);
1243   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1244   ierr = PetscViewerBinaryWriteAll(viewer,matvals,nz,PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
1245   ierr = PetscFree(matvals);CHKERRQ(ierr);
1246 
1247   /* write block size option to the viewer's .info file */
1248   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #include <petscdraw.h>
1253 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
1254 {
1255   Mat_MPIAIJ        *aij = (Mat_MPIAIJ*)mat->data;
1256   PetscErrorCode    ierr;
1257   PetscMPIInt       rank = aij->rank,size = aij->size;
1258   PetscBool         isdraw,iascii,isbinary;
1259   PetscViewer       sviewer;
1260   PetscViewerFormat format;
1261 
1262   PetscFunctionBegin;
1263   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1264   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1265   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1266   if (iascii) {
1267     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1268     if (format == PETSC_VIEWER_LOAD_BALANCE) {
1269       PetscInt i,nmax = 0,nmin = PETSC_MAX_INT,navg = 0,*nz,nzlocal = ((Mat_SeqAIJ*) (aij->A->data))->nz + ((Mat_SeqAIJ*) (aij->B->data))->nz;
1270       ierr = PetscMalloc1(size,&nz);CHKERRQ(ierr);
1271       ierr = MPI_Allgather(&nzlocal,1,MPIU_INT,nz,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRMPI(ierr);
1272       for (i=0; i<(PetscInt)size; i++) {
1273         nmax = PetscMax(nmax,nz[i]);
1274         nmin = PetscMin(nmin,nz[i]);
1275         navg += nz[i];
1276       }
1277       ierr = PetscFree(nz);CHKERRQ(ierr);
1278       navg = navg/size;
1279       ierr = PetscViewerASCIIPrintf(viewer,"Load Balance - Nonzeros: Min %D  avg %D  max %D\n",nmin,navg,nmax);CHKERRQ(ierr);
1280       PetscFunctionReturn(0);
1281     }
1282     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1283     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1284       MatInfo   info;
1285       PetscBool inodes;
1286 
1287       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRMPI(ierr);
1288       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
1289       ierr = MatInodeGetInodeSizes(aij->A,NULL,(PetscInt**)&inodes,NULL);CHKERRQ(ierr);
1290       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1291       if (!inodes) {
1292         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, not using I-node routines\n",
1293                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1294       } else {
1295         ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D mem %g, using I-node routines\n",
1296                                                   rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(double)info.memory);CHKERRQ(ierr);
1297       }
1298       ierr = MatGetInfo(aij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
1299       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1300       ierr = MatGetInfo(aij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
1301       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
1302       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1303       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1304       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
1305       ierr = VecScatterView(aij->Mvctx,viewer);CHKERRQ(ierr);
1306       PetscFunctionReturn(0);
1307     } else if (format == PETSC_VIEWER_ASCII_INFO) {
1308       PetscInt inodecount,inodelimit,*inodes;
1309       ierr = MatInodeGetInodeSizes(aij->A,&inodecount,&inodes,&inodelimit);CHKERRQ(ierr);
1310       if (inodes) {
1311         ierr = PetscViewerASCIIPrintf(viewer,"using I-node (on process 0) routines: found %D nodes, limit used is %D\n",inodecount,inodelimit);CHKERRQ(ierr);
1312       } else {
1313         ierr = PetscViewerASCIIPrintf(viewer,"not using I-node (on process 0) routines\n");CHKERRQ(ierr);
1314       }
1315       PetscFunctionReturn(0);
1316     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1317       PetscFunctionReturn(0);
1318     }
1319   } else if (isbinary) {
1320     if (size == 1) {
1321       ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1322       ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1323     } else {
1324       ierr = MatView_MPIAIJ_Binary(mat,viewer);CHKERRQ(ierr);
1325     }
1326     PetscFunctionReturn(0);
1327   } else if (iascii && size == 1) {
1328     ierr = PetscObjectSetName((PetscObject)aij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
1329     ierr = MatView(aij->A,viewer);CHKERRQ(ierr);
1330     PetscFunctionReturn(0);
1331   } else if (isdraw) {
1332     PetscDraw draw;
1333     PetscBool isnull;
1334     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1335     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1336     if (isnull) PetscFunctionReturn(0);
1337   }
1338 
1339   { /* assemble the entire matrix onto first processor */
1340     Mat A = NULL, Av;
1341     IS  isrow,iscol;
1342 
1343     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1344     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1345     ierr = MatCreateSubMatrix(mat,isrow,iscol,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1346     ierr = MatMPIAIJGetSeqAIJ(A,&Av,NULL,NULL);CHKERRQ(ierr);
1347 /*  The commented code uses MatCreateSubMatrices instead */
1348 /*
1349     Mat *AA, A = NULL, Av;
1350     IS  isrow,iscol;
1351 
1352     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->rmap->N : 0,0,1,&isrow);CHKERRQ(ierr);
1353     ierr = ISCreateStride(PetscObjectComm((PetscObject)mat),!rank ? mat->cmap->N : 0,0,1,&iscol);CHKERRQ(ierr);
1354     ierr = MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA);CHKERRQ(ierr);
1355     if (!rank) {
1356        ierr = PetscObjectReference((PetscObject)AA[0]);CHKERRQ(ierr);
1357        A    = AA[0];
1358        Av   = AA[0];
1359     }
1360     ierr = MatDestroySubMatrices(1,&AA);CHKERRQ(ierr);
1361 */
1362     ierr = ISDestroy(&iscol);CHKERRQ(ierr);
1363     ierr = ISDestroy(&isrow);CHKERRQ(ierr);
1364     /*
1365        Everyone has to call to draw the matrix since the graphics waits are
1366        synchronized across all processors that share the PetscDraw object
1367     */
1368     ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1369     if (!rank) {
1370       if (((PetscObject)mat)->name) {
1371         ierr = PetscObjectSetName((PetscObject)Av,((PetscObject)mat)->name);CHKERRQ(ierr);
1372       }
1373       ierr = MatView_SeqAIJ(Av,sviewer);CHKERRQ(ierr);
1374     }
1375     ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr);
1376     ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1377     ierr = MatDestroy(&A);CHKERRQ(ierr);
1378   }
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 PetscErrorCode MatView_MPIAIJ(Mat mat,PetscViewer viewer)
1383 {
1384   PetscErrorCode ierr;
1385   PetscBool      iascii,isdraw,issocket,isbinary;
1386 
1387   PetscFunctionBegin;
1388   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1389   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1390   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1391   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
1392   if (iascii || isdraw || isbinary || issocket) {
1393     ierr = MatView_MPIAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
1394   }
1395   PetscFunctionReturn(0);
1396 }
1397 
1398 PetscErrorCode MatSOR_MPIAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
1399 {
1400   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1401   PetscErrorCode ierr;
1402   Vec            bb1 = NULL;
1403   PetscBool      hasop;
1404 
1405   PetscFunctionBegin;
1406   if (flag == SOR_APPLY_UPPER) {
1407     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1408     PetscFunctionReturn(0);
1409   }
1410 
1411   if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) {
1412     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
1413   }
1414 
1415   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
1416     if (flag & SOR_ZERO_INITIAL_GUESS) {
1417       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1418       its--;
1419     }
1420 
1421     while (its--) {
1422       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1423       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1424 
1425       /* update rhs: bb1 = bb - B*x */
1426       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1427       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1428 
1429       /* local sweep */
1430       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1431     }
1432   } else if (flag & SOR_LOCAL_FORWARD_SWEEP) {
1433     if (flag & SOR_ZERO_INITIAL_GUESS) {
1434       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1435       its--;
1436     }
1437     while (its--) {
1438       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1439       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1440 
1441       /* update rhs: bb1 = bb - B*x */
1442       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1443       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1444 
1445       /* local sweep */
1446       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_FORWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1447     }
1448   } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) {
1449     if (flag & SOR_ZERO_INITIAL_GUESS) {
1450       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
1451       its--;
1452     }
1453     while (its--) {
1454       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1455       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1456 
1457       /* update rhs: bb1 = bb - B*x */
1458       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
1459       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb,bb1);CHKERRQ(ierr);
1460 
1461       /* local sweep */
1462       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_BACKWARD_SWEEP,fshift,lits,1,xx);CHKERRQ(ierr);
1463     }
1464   } else if (flag & SOR_EISENSTAT) {
1465     Vec xx1;
1466 
1467     ierr = VecDuplicate(bb,&xx1);CHKERRQ(ierr);
1468     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
1469 
1470     ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1471     ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1472     if (!mat->diag) {
1473       ierr = MatCreateVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
1474       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
1475     }
1476     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
1477     if (hasop) {
1478       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
1479     } else {
1480       ierr = VecPointwiseMult(bb1,mat->diag,xx);CHKERRQ(ierr);
1481     }
1482     ierr = VecAYPX(bb1,(omega-2.0)/omega,bb);CHKERRQ(ierr);
1483 
1484     ierr = MatMultAdd(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
1485 
1486     /* local sweep */
1487     ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
1488     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
1489     ierr = VecDestroy(&xx1);CHKERRQ(ierr);
1490   } else SETERRQ(PetscObjectComm((PetscObject)matin),PETSC_ERR_SUP,"Parallel SOR not supported");
1491 
1492   ierr = VecDestroy(&bb1);CHKERRQ(ierr);
1493 
1494   matin->factorerrortype = mat->A->factorerrortype;
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 PetscErrorCode MatPermute_MPIAIJ(Mat A,IS rowp,IS colp,Mat *B)
1499 {
1500   Mat            aA,aB,Aperm;
1501   const PetscInt *rwant,*cwant,*gcols,*ai,*bi,*aj,*bj;
1502   PetscScalar    *aa,*ba;
1503   PetscInt       i,j,m,n,ng,anz,bnz,*dnnz,*onnz,*tdnnz,*tonnz,*rdest,*cdest,*work,*gcdest;
1504   PetscSF        rowsf,sf;
1505   IS             parcolp = NULL;
1506   PetscBool      done;
1507   PetscErrorCode ierr;
1508 
1509   PetscFunctionBegin;
1510   ierr = MatGetLocalSize(A,&m,&n);CHKERRQ(ierr);
1511   ierr = ISGetIndices(rowp,&rwant);CHKERRQ(ierr);
1512   ierr = ISGetIndices(colp,&cwant);CHKERRQ(ierr);
1513   ierr = PetscMalloc3(PetscMax(m,n),&work,m,&rdest,n,&cdest);CHKERRQ(ierr);
1514 
1515   /* Invert row permutation to find out where my rows should go */
1516   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&rowsf);CHKERRQ(ierr);
1517   ierr = PetscSFSetGraphLayout(rowsf,A->rmap,A->rmap->n,NULL,PETSC_OWN_POINTER,rwant);CHKERRQ(ierr);
1518   ierr = PetscSFSetFromOptions(rowsf);CHKERRQ(ierr);
1519   for (i=0; i<m; i++) work[i] = A->rmap->rstart + i;
1520   ierr = PetscSFReduceBegin(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1521   ierr = PetscSFReduceEnd(rowsf,MPIU_INT,work,rdest,MPIU_REPLACE);CHKERRQ(ierr);
1522 
1523   /* Invert column permutation to find out where my columns should go */
1524   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1525   ierr = PetscSFSetGraphLayout(sf,A->cmap,A->cmap->n,NULL,PETSC_OWN_POINTER,cwant);CHKERRQ(ierr);
1526   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1527   for (i=0; i<n; i++) work[i] = A->cmap->rstart + i;
1528   ierr = PetscSFReduceBegin(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1529   ierr = PetscSFReduceEnd(sf,MPIU_INT,work,cdest,MPIU_REPLACE);CHKERRQ(ierr);
1530   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1531 
1532   ierr = ISRestoreIndices(rowp,&rwant);CHKERRQ(ierr);
1533   ierr = ISRestoreIndices(colp,&cwant);CHKERRQ(ierr);
1534   ierr = MatMPIAIJGetSeqAIJ(A,&aA,&aB,&gcols);CHKERRQ(ierr);
1535 
1536   /* Find out where my gcols should go */
1537   ierr = MatGetSize(aB,NULL,&ng);CHKERRQ(ierr);
1538   ierr = PetscMalloc1(ng,&gcdest);CHKERRQ(ierr);
1539   ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1540   ierr = PetscSFSetGraphLayout(sf,A->cmap,ng,NULL,PETSC_OWN_POINTER,gcols);CHKERRQ(ierr);
1541   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1542   ierr = PetscSFBcastBegin(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1543   ierr = PetscSFBcastEnd(sf,MPIU_INT,cdest,gcdest);CHKERRQ(ierr);
1544   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1545 
1546   ierr = PetscCalloc4(m,&dnnz,m,&onnz,m,&tdnnz,m,&tonnz);CHKERRQ(ierr);
1547   ierr = MatGetRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1548   ierr = MatGetRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1549   for (i=0; i<m; i++) {
1550     PetscInt    row = rdest[i];
1551     PetscMPIInt rowner;
1552     ierr = PetscLayoutFindOwner(A->rmap,row,&rowner);CHKERRQ(ierr);
1553     for (j=ai[i]; j<ai[i+1]; j++) {
1554       PetscInt    col = cdest[aj[j]];
1555       PetscMPIInt cowner;
1556       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr); /* Could build an index for the columns to eliminate this search */
1557       if (rowner == cowner) dnnz[i]++;
1558       else onnz[i]++;
1559     }
1560     for (j=bi[i]; j<bi[i+1]; j++) {
1561       PetscInt    col = gcdest[bj[j]];
1562       PetscMPIInt cowner;
1563       ierr = PetscLayoutFindOwner(A->cmap,col,&cowner);CHKERRQ(ierr);
1564       if (rowner == cowner) dnnz[i]++;
1565       else onnz[i]++;
1566     }
1567   }
1568   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1569   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,dnnz,tdnnz);CHKERRQ(ierr);
1570   ierr = PetscSFBcastBegin(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1571   ierr = PetscSFBcastEnd(rowsf,MPIU_INT,onnz,tonnz);CHKERRQ(ierr);
1572   ierr = PetscSFDestroy(&rowsf);CHKERRQ(ierr);
1573 
1574   ierr = MatCreateAIJ(PetscObjectComm((PetscObject)A),A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N,0,tdnnz,0,tonnz,&Aperm);CHKERRQ(ierr);
1575   ierr = MatSeqAIJGetArray(aA,&aa);CHKERRQ(ierr);
1576   ierr = MatSeqAIJGetArray(aB,&ba);CHKERRQ(ierr);
1577   for (i=0; i<m; i++) {
1578     PetscInt *acols = dnnz,*bcols = onnz; /* Repurpose now-unneeded arrays */
1579     PetscInt j0,rowlen;
1580     rowlen = ai[i+1] - ai[i];
1581     for (j0=j=0; j<rowlen; j0=j) { /* rowlen could be larger than number of rows m, so sum in batches */
1582       for (; j<PetscMin(rowlen,j0+m); j++) acols[j-j0] = cdest[aj[ai[i]+j]];
1583       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,acols,aa+ai[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1584     }
1585     rowlen = bi[i+1] - bi[i];
1586     for (j0=j=0; j<rowlen; j0=j) {
1587       for (; j<PetscMin(rowlen,j0+m); j++) bcols[j-j0] = gcdest[bj[bi[i]+j]];
1588       ierr = MatSetValues(Aperm,1,&rdest[i],j-j0,bcols,ba+bi[i]+j0,INSERT_VALUES);CHKERRQ(ierr);
1589     }
1590   }
1591   ierr = MatAssemblyBegin(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1592   ierr = MatAssemblyEnd(Aperm,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1593   ierr = MatRestoreRowIJ(aA,0,PETSC_FALSE,PETSC_FALSE,&anz,&ai,&aj,&done);CHKERRQ(ierr);
1594   ierr = MatRestoreRowIJ(aB,0,PETSC_FALSE,PETSC_FALSE,&bnz,&bi,&bj,&done);CHKERRQ(ierr);
1595   ierr = MatSeqAIJRestoreArray(aA,&aa);CHKERRQ(ierr);
1596   ierr = MatSeqAIJRestoreArray(aB,&ba);CHKERRQ(ierr);
1597   ierr = PetscFree4(dnnz,onnz,tdnnz,tonnz);CHKERRQ(ierr);
1598   ierr = PetscFree3(work,rdest,cdest);CHKERRQ(ierr);
1599   ierr = PetscFree(gcdest);CHKERRQ(ierr);
1600   if (parcolp) {ierr = ISDestroy(&colp);CHKERRQ(ierr);}
1601   *B = Aperm;
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 PetscErrorCode  MatGetGhosts_MPIAIJ(Mat mat,PetscInt *nghosts,const PetscInt *ghosts[])
1606 {
1607   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1608   PetscErrorCode ierr;
1609 
1610   PetscFunctionBegin;
1611   ierr = MatGetSize(aij->B,NULL,nghosts);CHKERRQ(ierr);
1612   if (ghosts) *ghosts = aij->garray;
1613   PetscFunctionReturn(0);
1614 }
1615 
1616 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1617 {
1618   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1619   Mat            A    = mat->A,B = mat->B;
1620   PetscErrorCode ierr;
1621   PetscLogDouble isend[5],irecv[5];
1622 
1623   PetscFunctionBegin;
1624   info->block_size = 1.0;
1625   ierr             = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1626 
1627   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1628   isend[3] = info->memory;  isend[4] = info->mallocs;
1629 
1630   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1631 
1632   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1633   isend[3] += info->memory;  isend[4] += info->mallocs;
1634   if (flag == MAT_LOCAL) {
1635     info->nz_used      = isend[0];
1636     info->nz_allocated = isend[1];
1637     info->nz_unneeded  = isend[2];
1638     info->memory       = isend[3];
1639     info->mallocs      = isend[4];
1640   } else if (flag == MAT_GLOBAL_MAX) {
1641     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1642 
1643     info->nz_used      = irecv[0];
1644     info->nz_allocated = irecv[1];
1645     info->nz_unneeded  = irecv[2];
1646     info->memory       = irecv[3];
1647     info->mallocs      = irecv[4];
1648   } else if (flag == MAT_GLOBAL_SUM) {
1649     ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1650 
1651     info->nz_used      = irecv[0];
1652     info->nz_allocated = irecv[1];
1653     info->nz_unneeded  = irecv[2];
1654     info->memory       = irecv[3];
1655     info->mallocs      = irecv[4];
1656   }
1657   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1658   info->fill_ratio_needed = 0;
1659   info->factor_mallocs    = 0;
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 PetscErrorCode MatSetOption_MPIAIJ(Mat A,MatOption op,PetscBool flg)
1664 {
1665   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1666   PetscErrorCode ierr;
1667 
1668   PetscFunctionBegin;
1669   switch (op) {
1670   case MAT_NEW_NONZERO_LOCATIONS:
1671   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1672   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1673   case MAT_KEEP_NONZERO_PATTERN:
1674   case MAT_NEW_NONZERO_LOCATION_ERR:
1675   case MAT_USE_INODES:
1676   case MAT_IGNORE_ZERO_ENTRIES:
1677     MatCheckPreallocated(A,1);
1678     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1679     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1680     break;
1681   case MAT_ROW_ORIENTED:
1682     MatCheckPreallocated(A,1);
1683     a->roworiented = flg;
1684 
1685     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1686     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1687     break;
1688   case MAT_FORCE_DIAGONAL_ENTRIES:
1689   case MAT_SORTED_FULL:
1690     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1691     break;
1692   case MAT_IGNORE_OFF_PROC_ENTRIES:
1693     a->donotstash = flg;
1694     break;
1695   /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */
1696   case MAT_SPD:
1697   case MAT_SYMMETRIC:
1698   case MAT_STRUCTURALLY_SYMMETRIC:
1699   case MAT_HERMITIAN:
1700   case MAT_SYMMETRY_ETERNAL:
1701     break;
1702   case MAT_SUBMAT_SINGLEIS:
1703     A->submat_singleis = flg;
1704     break;
1705   case MAT_STRUCTURE_ONLY:
1706     /* The option is handled directly by MatSetOption() */
1707     break;
1708   default:
1709     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1710   }
1711   PetscFunctionReturn(0);
1712 }
1713 
1714 PetscErrorCode MatGetRow_MPIAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1715 {
1716   Mat_MPIAIJ     *mat = (Mat_MPIAIJ*)matin->data;
1717   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1718   PetscErrorCode ierr;
1719   PetscInt       i,*cworkA,*cworkB,**pcA,**pcB,cstart = matin->cmap->rstart;
1720   PetscInt       nztot,nzA,nzB,lrow,rstart = matin->rmap->rstart,rend = matin->rmap->rend;
1721   PetscInt       *cmap,*idx_p;
1722 
1723   PetscFunctionBegin;
1724   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1725   mat->getrowactive = PETSC_TRUE;
1726 
1727   if (!mat->rowvalues && (idx || v)) {
1728     /*
1729         allocate enough space to hold information from the longest row.
1730     */
1731     Mat_SeqAIJ *Aa = (Mat_SeqAIJ*)mat->A->data,*Ba = (Mat_SeqAIJ*)mat->B->data;
1732     PetscInt   max = 1,tmp;
1733     for (i=0; i<matin->rmap->n; i++) {
1734       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i];
1735       if (max < tmp) max = tmp;
1736     }
1737     ierr = PetscMalloc2(max,&mat->rowvalues,max,&mat->rowindices);CHKERRQ(ierr);
1738   }
1739 
1740   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1741   lrow = row - rstart;
1742 
1743   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1744   if (!v)   {pvA = NULL; pvB = NULL;}
1745   if (!idx) {pcA = NULL; if (!v) pcB = NULL;}
1746   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1747   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1748   nztot = nzA + nzB;
1749 
1750   cmap = mat->garray;
1751   if (v  || idx) {
1752     if (nztot) {
1753       /* Sort by increasing column numbers, assuming A and B already sorted */
1754       PetscInt imark = -1;
1755       if (v) {
1756         *v = v_p = mat->rowvalues;
1757         for (i=0; i<nzB; i++) {
1758           if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i];
1759           else break;
1760         }
1761         imark = i;
1762         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1763         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1764       }
1765       if (idx) {
1766         *idx = idx_p = mat->rowindices;
1767         if (imark > -1) {
1768           for (i=0; i<imark; i++) {
1769             idx_p[i] = cmap[cworkB[i]];
1770           }
1771         } else {
1772           for (i=0; i<nzB; i++) {
1773             if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]];
1774             else break;
1775           }
1776           imark = i;
1777         }
1778         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart + cworkA[i];
1779         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]];
1780       }
1781     } else {
1782       if (idx) *idx = NULL;
1783       if (v)   *v   = NULL;
1784     }
1785   }
1786   *nz  = nztot;
1787   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1788   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1789   PetscFunctionReturn(0);
1790 }
1791 
1792 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1793 {
1794   Mat_MPIAIJ *aij = (Mat_MPIAIJ*)mat->data;
1795 
1796   PetscFunctionBegin;
1797   if (!aij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1798   aij->getrowactive = PETSC_FALSE;
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 PetscErrorCode MatNorm_MPIAIJ(Mat mat,NormType type,PetscReal *norm)
1803 {
1804   Mat_MPIAIJ     *aij  = (Mat_MPIAIJ*)mat->data;
1805   Mat_SeqAIJ     *amat = (Mat_SeqAIJ*)aij->A->data,*bmat = (Mat_SeqAIJ*)aij->B->data;
1806   PetscErrorCode ierr;
1807   PetscInt       i,j,cstart = mat->cmap->rstart;
1808   PetscReal      sum = 0.0;
1809   MatScalar      *v;
1810 
1811   PetscFunctionBegin;
1812   if (aij->size == 1) {
1813     ierr =  MatNorm(aij->A,type,norm);CHKERRQ(ierr);
1814   } else {
1815     if (type == NORM_FROBENIUS) {
1816       v = amat->a;
1817       for (i=0; i<amat->nz; i++) {
1818         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1819       }
1820       v = bmat->a;
1821       for (i=0; i<bmat->nz; i++) {
1822         sum += PetscRealPart(PetscConj(*v)*(*v)); v++;
1823       }
1824       ierr  = MPIU_Allreduce(&sum,norm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1825       *norm = PetscSqrtReal(*norm);
1826       ierr = PetscLogFlops(2.0*amat->nz+2.0*bmat->nz);CHKERRQ(ierr);
1827     } else if (type == NORM_1) { /* max column norm */
1828       PetscReal *tmp,*tmp2;
1829       PetscInt  *jj,*garray = aij->garray;
1830       ierr  = PetscCalloc1(mat->cmap->N+1,&tmp);CHKERRQ(ierr);
1831       ierr  = PetscMalloc1(mat->cmap->N+1,&tmp2);CHKERRQ(ierr);
1832       *norm = 0.0;
1833       v     = amat->a; jj = amat->j;
1834       for (j=0; j<amat->nz; j++) {
1835         tmp[cstart + *jj++] += PetscAbsScalar(*v);  v++;
1836       }
1837       v = bmat->a; jj = bmat->j;
1838       for (j=0; j<bmat->nz; j++) {
1839         tmp[garray[*jj++]] += PetscAbsScalar(*v); v++;
1840       }
1841       ierr = MPIU_Allreduce(tmp,tmp2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1842       for (j=0; j<mat->cmap->N; j++) {
1843         if (tmp2[j] > *norm) *norm = tmp2[j];
1844       }
1845       ierr = PetscFree(tmp);CHKERRQ(ierr);
1846       ierr = PetscFree(tmp2);CHKERRQ(ierr);
1847       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1848     } else if (type == NORM_INFINITY) { /* max row norm */
1849       PetscReal ntemp = 0.0;
1850       for (j=0; j<aij->A->rmap->n; j++) {
1851         v   = amat->a + amat->i[j];
1852         sum = 0.0;
1853         for (i=0; i<amat->i[j+1]-amat->i[j]; i++) {
1854           sum += PetscAbsScalar(*v); v++;
1855         }
1856         v = bmat->a + bmat->i[j];
1857         for (i=0; i<bmat->i[j+1]-bmat->i[j]; i++) {
1858           sum += PetscAbsScalar(*v); v++;
1859         }
1860         if (sum > ntemp) ntemp = sum;
1861       }
1862       ierr = MPIU_Allreduce(&ntemp,norm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
1863       ierr = PetscLogFlops(PetscMax(amat->nz+bmat->nz-1,0));CHKERRQ(ierr);
1864     } else SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"No support for two norm");
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 PetscErrorCode MatTranspose_MPIAIJ(Mat A,MatReuse reuse,Mat *matout)
1870 {
1871   Mat_MPIAIJ      *a    =(Mat_MPIAIJ*)A->data,*b;
1872   Mat_SeqAIJ      *Aloc =(Mat_SeqAIJ*)a->A->data,*Bloc=(Mat_SeqAIJ*)a->B->data,*sub_B_diag;
1873   PetscInt        M     = A->rmap->N,N=A->cmap->N,ma,na,mb,nb,row,*cols,*cols_tmp,*B_diag_ilen,i,ncol,A_diag_ncol;
1874   const PetscInt  *ai,*aj,*bi,*bj,*B_diag_i;
1875   PetscErrorCode  ierr;
1876   Mat             B,A_diag,*B_diag;
1877   const MatScalar *pbv,*bv;
1878 
1879   PetscFunctionBegin;
1880   ma = A->rmap->n; na = A->cmap->n; mb = a->B->rmap->n; nb = a->B->cmap->n;
1881   ai = Aloc->i; aj = Aloc->j;
1882   bi = Bloc->i; bj = Bloc->j;
1883   if (reuse == MAT_INITIAL_MATRIX || *matout == A) {
1884     PetscInt             *d_nnz,*g_nnz,*o_nnz;
1885     PetscSFNode          *oloc;
1886     PETSC_UNUSED PetscSF sf;
1887 
1888     ierr = PetscMalloc4(na,&d_nnz,na,&o_nnz,nb,&g_nnz,nb,&oloc);CHKERRQ(ierr);
1889     /* compute d_nnz for preallocation */
1890     ierr = PetscArrayzero(d_nnz,na);CHKERRQ(ierr);
1891     for (i=0; i<ai[ma]; i++) {
1892       d_nnz[aj[i]]++;
1893     }
1894     /* compute local off-diagonal contributions */
1895     ierr = PetscArrayzero(g_nnz,nb);CHKERRQ(ierr);
1896     for (i=0; i<bi[ma]; i++) g_nnz[bj[i]]++;
1897     /* map those to global */
1898     ierr = PetscSFCreate(PetscObjectComm((PetscObject)A),&sf);CHKERRQ(ierr);
1899     ierr = PetscSFSetGraphLayout(sf,A->cmap,nb,NULL,PETSC_USE_POINTER,a->garray);CHKERRQ(ierr);
1900     ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
1901     ierr = PetscArrayzero(o_nnz,na);CHKERRQ(ierr);
1902     ierr = PetscSFReduceBegin(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1903     ierr = PetscSFReduceEnd(sf,MPIU_INT,g_nnz,o_nnz,MPIU_SUM);CHKERRQ(ierr);
1904     ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
1905 
1906     ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
1907     ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr);
1908     ierr = MatSetBlockSizes(B,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1909     ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
1910     ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1911     ierr = PetscFree4(d_nnz,o_nnz,g_nnz,oloc);CHKERRQ(ierr);
1912   } else {
1913     B    = *matout;
1914     ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1915   }
1916 
1917   b           = (Mat_MPIAIJ*)B->data;
1918   A_diag      = a->A;
1919   B_diag      = &b->A;
1920   sub_B_diag  = (Mat_SeqAIJ*)(*B_diag)->data;
1921   A_diag_ncol = A_diag->cmap->N;
1922   B_diag_ilen = sub_B_diag->ilen;
1923   B_diag_i    = sub_B_diag->i;
1924 
1925   /* Set ilen for diagonal of B */
1926   for (i=0; i<A_diag_ncol; i++) {
1927     B_diag_ilen[i] = B_diag_i[i+1] - B_diag_i[i];
1928   }
1929 
1930   /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done
1931   very quickly (=without using MatSetValues), because all writes are local. */
1932   ierr = MatTranspose(A_diag,MAT_REUSE_MATRIX,B_diag);CHKERRQ(ierr);
1933 
1934   /* copy over the B part */
1935   ierr = PetscMalloc1(bi[mb],&cols);CHKERRQ(ierr);
1936   ierr = MatSeqAIJGetArrayRead(a->B,&bv);CHKERRQ(ierr);
1937   pbv  = bv;
1938   row  = A->rmap->rstart;
1939   for (i=0; i<bi[mb]; i++) cols[i] = a->garray[bj[i]];
1940   cols_tmp = cols;
1941   for (i=0; i<mb; i++) {
1942     ncol = bi[i+1]-bi[i];
1943     ierr = MatSetValues(B,ncol,cols_tmp,1,&row,pbv,INSERT_VALUES);CHKERRQ(ierr);
1944     row++;
1945     pbv += ncol; cols_tmp += ncol;
1946   }
1947   ierr = PetscFree(cols);CHKERRQ(ierr);
1948   ierr = MatSeqAIJRestoreArrayRead(a->B,&bv);CHKERRQ(ierr);
1949 
1950   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1951   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1952   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1953     *matout = B;
1954   } else {
1955     ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr);
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat,Vec ll,Vec rr)
1961 {
1962   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
1963   Mat            a    = aij->A,b = aij->B;
1964   PetscErrorCode ierr;
1965   PetscInt       s1,s2,s3;
1966 
1967   PetscFunctionBegin;
1968   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1969   if (rr) {
1970     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1971     if (s1!=s3) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1972     /* Overlap communication with computation. */
1973     ierr = VecScatterBegin(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1974   }
1975   if (ll) {
1976     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1977     if (s1!=s2) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1978     ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1979   }
1980   /* scale  the diagonal block */
1981   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1982 
1983   if (rr) {
1984     /* Do a scatter end and then right scale the off-diagonal block */
1985     ierr = VecScatterEnd(aij->Mvctx,rr,aij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1986     ierr = (*b->ops->diagonalscale)(b,NULL,aij->lvec);CHKERRQ(ierr);
1987   }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A)
1992 {
1993   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
1994   PetscErrorCode ierr;
1995 
1996   PetscFunctionBegin;
1997   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 PetscErrorCode MatEqual_MPIAIJ(Mat A,Mat B,PetscBool  *flag)
2002 {
2003   Mat_MPIAIJ     *matB = (Mat_MPIAIJ*)B->data,*matA = (Mat_MPIAIJ*)A->data;
2004   Mat            a,b,c,d;
2005   PetscBool      flg;
2006   PetscErrorCode ierr;
2007 
2008   PetscFunctionBegin;
2009   a = matA->A; b = matA->B;
2010   c = matB->A; d = matB->B;
2011 
2012   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
2013   if (flg) {
2014     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
2015   }
2016   ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 PetscErrorCode MatCopy_MPIAIJ(Mat A,Mat B,MatStructure str)
2021 {
2022   PetscErrorCode ierr;
2023   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2024   Mat_MPIAIJ     *b = (Mat_MPIAIJ*)B->data;
2025 
2026   PetscFunctionBegin;
2027   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
2028   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
2029     /* because of the column compression in the off-processor part of the matrix a->B,
2030        the number of columns in a->B and b->B may be different, hence we cannot call
2031        the MatCopy() directly on the two parts. If need be, we can provide a more
2032        efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices
2033        then copying the submatrices */
2034     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2035   } else {
2036     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
2037     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
2038   }
2039   ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2040   PetscFunctionReturn(0);
2041 }
2042 
2043 PetscErrorCode MatSetUp_MPIAIJ(Mat A)
2044 {
2045   PetscErrorCode ierr;
2046 
2047   PetscFunctionBegin;
2048   ierr = MatMPIAIJSetPreallocation(A,PETSC_DEFAULT,NULL,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 /*
2053    Computes the number of nonzeros per row needed for preallocation when X and Y
2054    have different nonzero structure.
2055 */
2056 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m,const PetscInt *xi,const PetscInt *xj,const PetscInt *xltog,const PetscInt *yi,const PetscInt *yj,const PetscInt *yltog,PetscInt *nnz)
2057 {
2058   PetscInt       i,j,k,nzx,nzy;
2059 
2060   PetscFunctionBegin;
2061   /* Set the number of nonzeros in the new matrix */
2062   for (i=0; i<m; i++) {
2063     const PetscInt *xjj = xj+xi[i],*yjj = yj+yi[i];
2064     nzx = xi[i+1] - xi[i];
2065     nzy = yi[i+1] - yi[i];
2066     nnz[i] = 0;
2067     for (j=0,k=0; j<nzx; j++) {                   /* Point in X */
2068       for (; k<nzy && yltog[yjj[k]]<xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */
2069       if (k<nzy && yltog[yjj[k]]==xltog[xjj[j]]) k++;             /* Skip duplicate */
2070       nnz[i]++;
2071     }
2072     for (; k<nzy; k++) nnz[i]++;
2073   }
2074   PetscFunctionReturn(0);
2075 }
2076 
2077 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */
2078 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y,const PetscInt *yltog,Mat X,const PetscInt *xltog,PetscInt *nnz)
2079 {
2080   PetscErrorCode ierr;
2081   PetscInt       m = Y->rmap->N;
2082   Mat_SeqAIJ     *x = (Mat_SeqAIJ*)X->data;
2083   Mat_SeqAIJ     *y = (Mat_SeqAIJ*)Y->data;
2084 
2085   PetscFunctionBegin;
2086   ierr = MatAXPYGetPreallocation_MPIX_private(m,x->i,x->j,xltog,y->i,y->j,yltog,nnz);CHKERRQ(ierr);
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 PetscErrorCode MatAXPY_MPIAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2091 {
2092   PetscErrorCode ierr;
2093   Mat_MPIAIJ     *xx = (Mat_MPIAIJ*)X->data,*yy = (Mat_MPIAIJ*)Y->data;
2094 
2095   PetscFunctionBegin;
2096   if (str == SAME_NONZERO_PATTERN) {
2097     ierr = MatAXPY(yy->A,a,xx->A,str);CHKERRQ(ierr);
2098     ierr = MatAXPY(yy->B,a,xx->B,str);CHKERRQ(ierr);
2099   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2100     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2101   } else {
2102     Mat      B;
2103     PetscInt *nnz_d,*nnz_o;
2104 
2105     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
2106     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
2107     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2108     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2109     ierr = MatSetLayouts(B,Y->rmap,Y->cmap);CHKERRQ(ierr);
2110     ierr = MatSetType(B,((PetscObject)Y)->type_name);CHKERRQ(ierr);
2111     ierr = MatAXPYGetPreallocation_SeqAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
2112     ierr = MatAXPYGetPreallocation_MPIAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
2113     ierr = MatMPIAIJSetPreallocation(B,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
2114     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2115     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2116     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
2117     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
2118   }
2119   PetscFunctionReturn(0);
2120 }
2121 
2122 extern PetscErrorCode  MatConjugate_SeqAIJ(Mat);
2123 
2124 PetscErrorCode  MatConjugate_MPIAIJ(Mat mat)
2125 {
2126 #if defined(PETSC_USE_COMPLEX)
2127   PetscErrorCode ierr;
2128   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2129 
2130   PetscFunctionBegin;
2131   ierr = MatConjugate_SeqAIJ(aij->A);CHKERRQ(ierr);
2132   ierr = MatConjugate_SeqAIJ(aij->B);CHKERRQ(ierr);
2133 #else
2134   PetscFunctionBegin;
2135 #endif
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 PetscErrorCode MatRealPart_MPIAIJ(Mat A)
2140 {
2141   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   ierr = MatRealPart(a->A);CHKERRQ(ierr);
2146   ierr = MatRealPart(a->B);CHKERRQ(ierr);
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A)
2151 {
2152   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2153   PetscErrorCode ierr;
2154 
2155   PetscFunctionBegin;
2156   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
2157   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 
2161 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2162 {
2163   Mat_MPIAIJ        *a = (Mat_MPIAIJ*)A->data;
2164   PetscErrorCode    ierr;
2165   PetscInt          i,*idxb = NULL,m = A->rmap->n;
2166   PetscScalar       *va,*vv;
2167   Vec               vB,vA;
2168   const PetscScalar *vb;
2169 
2170   PetscFunctionBegin;
2171   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vA);CHKERRQ(ierr);
2172   ierr = MatGetRowMaxAbs(a->A,vA,idx);CHKERRQ(ierr);
2173 
2174   ierr = VecGetArrayWrite(vA,&va);CHKERRQ(ierr);
2175   if (idx) {
2176     for (i=0; i<m; i++) {
2177       if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart;
2178     }
2179   }
2180 
2181   ierr = VecCreateSeq(PETSC_COMM_SELF,m,&vB);CHKERRQ(ierr);
2182   ierr = PetscMalloc1(m,&idxb);CHKERRQ(ierr);
2183   ierr = MatGetRowMaxAbs(a->B,vB,idxb);CHKERRQ(ierr);
2184 
2185   ierr = VecGetArrayWrite(v,&vv);CHKERRQ(ierr);
2186   ierr = VecGetArrayRead(vB,&vb);CHKERRQ(ierr);
2187   for (i=0; i<m; i++) {
2188     if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) {
2189       vv[i] = vb[i];
2190       if (idx) idx[i] = a->garray[idxb[i]];
2191     } else {
2192       vv[i] = va[i];
2193       if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]])
2194         idx[i] = a->garray[idxb[i]];
2195     }
2196   }
2197   ierr = VecRestoreArrayWrite(vA,&vv);CHKERRQ(ierr);
2198   ierr = VecRestoreArrayWrite(vA,&va);CHKERRQ(ierr);
2199   ierr = VecRestoreArrayRead(vB,&vb);CHKERRQ(ierr);
2200   ierr = PetscFree(idxb);CHKERRQ(ierr);
2201   ierr = VecDestroy(&vA);CHKERRQ(ierr);
2202   ierr = VecDestroy(&vB);CHKERRQ(ierr);
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[])
2207 {
2208   Mat_MPIAIJ        *mat   = (Mat_MPIAIJ*) A->data;
2209   PetscInt          m = A->rmap->n,n = A->cmap->n;
2210   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2211   PetscInt          *cmap  = mat->garray;
2212   PetscInt          *diagIdx, *offdiagIdx;
2213   Vec               diagV, offdiagV;
2214   PetscScalar       *a, *diagA, *offdiagA;
2215   const PetscScalar *ba,*bav;
2216   PetscInt          r,j,col,ncols,*bi,*bj;
2217   PetscErrorCode    ierr;
2218   Mat               B = mat->B;
2219   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2220 
2221   PetscFunctionBegin;
2222   /* When a process holds entire A and other processes have no entry */
2223   if (A->cmap->N == n) {
2224     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2225     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2226     ierr = MatGetRowMinAbs(mat->A,diagV,idx);CHKERRQ(ierr);
2227     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2228     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2229     PetscFunctionReturn(0);
2230   } else if (n == 0) {
2231     if (m) {
2232       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2233       for (r = 0; r < m; r++) {a[r] = 0.0; if (idx) idx[r] = -1;}
2234       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2235     }
2236     PetscFunctionReturn(0);
2237   }
2238 
2239   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2240   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2241   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2242   ierr = MatGetRowMinAbs(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2243 
2244   /* Get offdiagIdx[] for implicit 0.0 */
2245   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2246   ba   = bav;
2247   bi   = b->i;
2248   bj   = b->j;
2249   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2250   for (r = 0; r < m; r++) {
2251     ncols = bi[r+1] - bi[r];
2252     if (ncols == A->cmap->N - n) { /* Brow is dense */
2253       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2254     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2255       offdiagA[r] = 0.0;
2256 
2257       /* Find first hole in the cmap */
2258       for (j=0; j<ncols; j++) {
2259         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2260         if (col > j && j < cstart) {
2261           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2262           break;
2263         } else if (col > j + n && j >= cstart) {
2264           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2265           break;
2266         }
2267       }
2268       if (j == ncols && ncols < A->cmap->N - n) {
2269         /* a hole is outside compressed Bcols */
2270         if (ncols == 0) {
2271           if (cstart) {
2272             offdiagIdx[r] = 0;
2273           } else offdiagIdx[r] = cend;
2274         } else { /* ncols > 0 */
2275           offdiagIdx[r] = cmap[ncols-1] + 1;
2276           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2277         }
2278       }
2279     }
2280 
2281     for (j=0; j<ncols; j++) {
2282       if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2283       ba++; bj++;
2284     }
2285   }
2286 
2287   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2288   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2289   for (r = 0; r < m; ++r) {
2290     if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) {
2291       a[r]   = diagA[r];
2292       if (idx) idx[r] = cstart + diagIdx[r];
2293     } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) {
2294       a[r] = diagA[r];
2295       if (idx) {
2296         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2297           idx[r] = cstart + diagIdx[r];
2298         } else idx[r] = offdiagIdx[r];
2299       }
2300     } else {
2301       a[r]   = offdiagA[r];
2302       if (idx) idx[r] = offdiagIdx[r];
2303     }
2304   }
2305   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2306   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2307   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2308   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2309   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2310   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2311   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2316 {
2317   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*) A->data;
2318   PetscInt          m = A->rmap->n,n = A->cmap->n;
2319   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2320   PetscInt          *cmap  = mat->garray;
2321   PetscInt          *diagIdx, *offdiagIdx;
2322   Vec               diagV, offdiagV;
2323   PetscScalar       *a, *diagA, *offdiagA;
2324   const PetscScalar *ba,*bav;
2325   PetscInt          r,j,col,ncols,*bi,*bj;
2326   PetscErrorCode    ierr;
2327   Mat               B = mat->B;
2328   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2329 
2330   PetscFunctionBegin;
2331   /* When a process holds entire A and other processes have no entry */
2332   if (A->cmap->N == n) {
2333     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2334     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2335     ierr = MatGetRowMin(mat->A,diagV,idx);CHKERRQ(ierr);
2336     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2337     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2338     PetscFunctionReturn(0);
2339   } else if (n == 0) {
2340     if (m) {
2341       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2342       for (r = 0; r < m; r++) {a[r] = PETSC_MAX_REAL; if (idx) idx[r] = -1;}
2343       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2344     }
2345     PetscFunctionReturn(0);
2346   }
2347 
2348   ierr = PetscCalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2349   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2350   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2351   ierr = MatGetRowMin(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2352 
2353   /* Get offdiagIdx[] for implicit 0.0 */
2354   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2355   ba   = bav;
2356   bi   = b->i;
2357   bj   = b->j;
2358   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2359   for (r = 0; r < m; r++) {
2360     ncols = bi[r+1] - bi[r];
2361     if (ncols == A->cmap->N - n) { /* Brow is dense */
2362       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2363     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2364       offdiagA[r] = 0.0;
2365 
2366       /* Find first hole in the cmap */
2367       for (j=0; j<ncols; j++) {
2368         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2369         if (col > j && j < cstart) {
2370           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2371           break;
2372         } else if (col > j + n && j >= cstart) {
2373           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2374           break;
2375         }
2376       }
2377       if (j == ncols && ncols < A->cmap->N - n) {
2378         /* a hole is outside compressed Bcols */
2379         if (ncols == 0) {
2380           if (cstart) {
2381             offdiagIdx[r] = 0;
2382           } else offdiagIdx[r] = cend;
2383         } else { /* ncols > 0 */
2384           offdiagIdx[r] = cmap[ncols-1] + 1;
2385           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2386         }
2387       }
2388     }
2389 
2390     for (j=0; j<ncols; j++) {
2391       if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2392       ba++; bj++;
2393     }
2394   }
2395 
2396   ierr = VecGetArrayWrite(v, &a);CHKERRQ(ierr);
2397   ierr = VecGetArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2398   for (r = 0; r < m; ++r) {
2399     if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) {
2400       a[r]   = diagA[r];
2401       if (idx) idx[r] = cstart + diagIdx[r];
2402     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2403       a[r] = diagA[r];
2404       if (idx) {
2405         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2406           idx[r] = cstart + diagIdx[r];
2407         } else idx[r] = offdiagIdx[r];
2408       }
2409     } else {
2410       a[r]   = offdiagA[r];
2411       if (idx) idx[r] = offdiagIdx[r];
2412     }
2413   }
2414   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2415   ierr = VecRestoreArrayWrite(v, &a);CHKERRQ(ierr);
2416   ierr = VecRestoreArrayRead(diagV, (const PetscScalar**)&diagA);CHKERRQ(ierr);
2417   ierr = VecRestoreArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2418   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2419   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2420   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A,Vec v,PetscInt idx[])
2425 {
2426   Mat_MPIAIJ        *mat = (Mat_MPIAIJ*)A->data;
2427   PetscInt          m = A->rmap->n,n = A->cmap->n;
2428   PetscInt          cstart = A->cmap->rstart,cend = A->cmap->rend;
2429   PetscInt          *cmap  = mat->garray;
2430   PetscInt          *diagIdx, *offdiagIdx;
2431   Vec               diagV, offdiagV;
2432   PetscScalar       *a, *diagA, *offdiagA;
2433   const PetscScalar *ba,*bav;
2434   PetscInt          r,j,col,ncols,*bi,*bj;
2435   PetscErrorCode    ierr;
2436   Mat               B = mat->B;
2437   Mat_SeqAIJ        *b = (Mat_SeqAIJ*)B->data;
2438 
2439   PetscFunctionBegin;
2440   /* When a process holds entire A and other processes have no entry */
2441   if (A->cmap->N == n) {
2442     ierr = VecGetArrayWrite(v,&diagA);CHKERRQ(ierr);
2443     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,m,diagA,&diagV);CHKERRQ(ierr);
2444     ierr = MatGetRowMax(mat->A,diagV,idx);CHKERRQ(ierr);
2445     ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2446     ierr = VecRestoreArrayWrite(v,&diagA);CHKERRQ(ierr);
2447     PetscFunctionReturn(0);
2448   } else if (n == 0) {
2449     if (m) {
2450       ierr = VecGetArrayWrite(v,&a);CHKERRQ(ierr);
2451       for (r = 0; r < m; r++) {a[r] = PETSC_MIN_REAL; if (idx) idx[r] = -1;}
2452       ierr = VecRestoreArrayWrite(v,&a);CHKERRQ(ierr);
2453     }
2454     PetscFunctionReturn(0);
2455   }
2456 
2457   ierr = PetscMalloc2(m,&diagIdx,m,&offdiagIdx);CHKERRQ(ierr);
2458   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &diagV);CHKERRQ(ierr);
2459   ierr = VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV);CHKERRQ(ierr);
2460   ierr = MatGetRowMax(mat->A, diagV, diagIdx);CHKERRQ(ierr);
2461 
2462   /* Get offdiagIdx[] for implicit 0.0 */
2463   ierr = MatSeqAIJGetArrayRead(B,&bav);CHKERRQ(ierr);
2464   ba   = bav;
2465   bi   = b->i;
2466   bj   = b->j;
2467   ierr = VecGetArrayWrite(offdiagV, &offdiagA);CHKERRQ(ierr);
2468   for (r = 0; r < m; r++) {
2469     ncols = bi[r+1] - bi[r];
2470     if (ncols == A->cmap->N - n) { /* Brow is dense */
2471       offdiagA[r] = *ba; offdiagIdx[r] = cmap[0];
2472     } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */
2473       offdiagA[r] = 0.0;
2474 
2475       /* Find first hole in the cmap */
2476       for (j=0; j<ncols; j++) {
2477         col = cmap[bj[j]]; /* global column number = cmap[B column number] */
2478         if (col > j && j < cstart) {
2479           offdiagIdx[r] = j; /* global column number of first implicit 0.0 */
2480           break;
2481         } else if (col > j + n && j >= cstart) {
2482           offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */
2483           break;
2484         }
2485       }
2486       if (j == ncols && ncols < A->cmap->N - n) {
2487         /* a hole is outside compressed Bcols */
2488         if (ncols == 0) {
2489           if (cstart) {
2490             offdiagIdx[r] = 0;
2491           } else offdiagIdx[r] = cend;
2492         } else { /* ncols > 0 */
2493           offdiagIdx[r] = cmap[ncols-1] + 1;
2494           if (offdiagIdx[r] == cstart) offdiagIdx[r] += n;
2495         }
2496       }
2497     }
2498 
2499     for (j=0; j<ncols; j++) {
2500       if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) {offdiagA[r] = *ba; offdiagIdx[r] = cmap[*bj];}
2501       ba++; bj++;
2502     }
2503   }
2504 
2505   ierr = VecGetArrayWrite(v,    &a);CHKERRQ(ierr);
2506   ierr = VecGetArrayRead(diagV,(const PetscScalar**)&diagA);CHKERRQ(ierr);
2507   for (r = 0; r < m; ++r) {
2508     if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) {
2509       a[r] = diagA[r];
2510       if (idx) idx[r] = cstart + diagIdx[r];
2511     } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) {
2512       a[r] = diagA[r];
2513       if (idx) {
2514         if (cstart + diagIdx[r] <= offdiagIdx[r]) {
2515           idx[r] = cstart + diagIdx[r];
2516         } else idx[r] = offdiagIdx[r];
2517       }
2518     } else {
2519       a[r] = offdiagA[r];
2520       if (idx) idx[r] = offdiagIdx[r];
2521     }
2522   }
2523   ierr = MatSeqAIJRestoreArrayRead(B,&bav);CHKERRQ(ierr);
2524   ierr = VecRestoreArrayWrite(v,       &a);CHKERRQ(ierr);
2525   ierr = VecRestoreArrayRead(diagV,   (const PetscScalar**)&diagA);CHKERRQ(ierr);
2526   ierr = VecRestoreArrayWrite(offdiagV,&offdiagA);CHKERRQ(ierr);
2527   ierr = VecDestroy(&diagV);CHKERRQ(ierr);
2528   ierr = VecDestroy(&offdiagV);CHKERRQ(ierr);
2529   ierr = PetscFree2(diagIdx, offdiagIdx);CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat,Mat *newmat)
2534 {
2535   PetscErrorCode ierr;
2536   Mat            *dummy;
2537 
2538   PetscFunctionBegin;
2539   ierr    = MatCreateSubMatrix_MPIAIJ_All(mat,MAT_DO_NOT_GET_VALUES,MAT_INITIAL_MATRIX,&dummy);CHKERRQ(ierr);
2540   *newmat = *dummy;
2541   ierr    = PetscFree(dummy);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 PetscErrorCode  MatInvertBlockDiagonal_MPIAIJ(Mat A,const PetscScalar **values)
2546 {
2547   Mat_MPIAIJ     *a = (Mat_MPIAIJ*) A->data;
2548   PetscErrorCode ierr;
2549 
2550   PetscFunctionBegin;
2551   ierr = MatInvertBlockDiagonal(a->A,values);CHKERRQ(ierr);
2552   A->factorerrortype = a->A->factorerrortype;
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 static PetscErrorCode  MatSetRandom_MPIAIJ(Mat x,PetscRandom rctx)
2557 {
2558   PetscErrorCode ierr;
2559   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)x->data;
2560 
2561   PetscFunctionBegin;
2562   if (!x->assembled && !x->preallocated) SETERRQ(PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed");
2563   ierr = MatSetRandom(aij->A,rctx);CHKERRQ(ierr);
2564   if (x->assembled) {
2565     ierr = MatSetRandom(aij->B,rctx);CHKERRQ(ierr);
2566   } else {
2567     ierr = MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B,x->cmap->rstart,x->cmap->rend,rctx);CHKERRQ(ierr);
2568   }
2569   ierr = MatAssemblyBegin(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2570   ierr = MatAssemblyEnd(x,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A,PetscBool sc)
2575 {
2576   PetscFunctionBegin;
2577   if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable;
2578   else A->ops->increaseoverlap    = MatIncreaseOverlap_MPIAIJ;
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 /*@
2583    MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap
2584 
2585    Collective on Mat
2586 
2587    Input Parameters:
2588 +    A - the matrix
2589 -    sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm)
2590 
2591  Level: advanced
2592 
2593 @*/
2594 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A,PetscBool sc)
2595 {
2596   PetscErrorCode       ierr;
2597 
2598   PetscFunctionBegin;
2599   ierr = PetscTryMethod(A,"MatMPIAIJSetUseScalableIncreaseOverlap_C",(Mat,PetscBool),(A,sc));CHKERRQ(ierr);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatSetFromOptions_MPIAIJ(PetscOptionItems *PetscOptionsObject,Mat A)
2604 {
2605   PetscErrorCode       ierr;
2606   PetscBool            sc = PETSC_FALSE,flg;
2607 
2608   PetscFunctionBegin;
2609   ierr = PetscOptionsHead(PetscOptionsObject,"MPIAIJ options");CHKERRQ(ierr);
2610   if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE;
2611   ierr = PetscOptionsBool("-mat_increase_overlap_scalable","Use a scalable algorithm to compute the overlap","MatIncreaseOverlap",sc,&sc,&flg);CHKERRQ(ierr);
2612   if (flg) {
2613     ierr = MatMPIAIJSetUseScalableIncreaseOverlap(A,sc);CHKERRQ(ierr);
2614   }
2615   ierr = PetscOptionsTail();CHKERRQ(ierr);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatShift_MPIAIJ(Mat Y,PetscScalar a)
2620 {
2621   PetscErrorCode ierr;
2622   Mat_MPIAIJ     *maij = (Mat_MPIAIJ*)Y->data;
2623   Mat_SeqAIJ     *aij = (Mat_SeqAIJ*)maij->A->data;
2624 
2625   PetscFunctionBegin;
2626   if (!Y->preallocated) {
2627     ierr = MatMPIAIJSetPreallocation(Y,1,NULL,0,NULL);CHKERRQ(ierr);
2628   } else if (!aij->nz) {
2629     PetscInt nonew = aij->nonew;
2630     ierr = MatSeqAIJSetPreallocation(maij->A,1,NULL);CHKERRQ(ierr);
2631     aij->nonew = nonew;
2632   }
2633   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A,PetscBool  *missing,PetscInt *d)
2638 {
2639   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2640   PetscErrorCode ierr;
2641 
2642   PetscFunctionBegin;
2643   if (A->rmap->n != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only works for square matrices");
2644   ierr = MatMissingDiagonal(a->A,missing,d);CHKERRQ(ierr);
2645   if (d) {
2646     PetscInt rstart;
2647     ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
2648     *d += rstart;
2649 
2650   }
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A,PetscInt nblocks,const PetscInt *bsizes,PetscScalar *diag)
2655 {
2656   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
2657   PetscErrorCode ierr;
2658 
2659   PetscFunctionBegin;
2660   ierr = MatInvertVariableBlockDiagonal(a->A,nblocks,bsizes,diag);CHKERRQ(ierr);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 /* -------------------------------------------------------------------*/
2665 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ,
2666                                        MatGetRow_MPIAIJ,
2667                                        MatRestoreRow_MPIAIJ,
2668                                        MatMult_MPIAIJ,
2669                                 /* 4*/ MatMultAdd_MPIAIJ,
2670                                        MatMultTranspose_MPIAIJ,
2671                                        MatMultTransposeAdd_MPIAIJ,
2672                                        NULL,
2673                                        NULL,
2674                                        NULL,
2675                                 /*10*/ NULL,
2676                                        NULL,
2677                                        NULL,
2678                                        MatSOR_MPIAIJ,
2679                                        MatTranspose_MPIAIJ,
2680                                 /*15*/ MatGetInfo_MPIAIJ,
2681                                        MatEqual_MPIAIJ,
2682                                        MatGetDiagonal_MPIAIJ,
2683                                        MatDiagonalScale_MPIAIJ,
2684                                        MatNorm_MPIAIJ,
2685                                 /*20*/ MatAssemblyBegin_MPIAIJ,
2686                                        MatAssemblyEnd_MPIAIJ,
2687                                        MatSetOption_MPIAIJ,
2688                                        MatZeroEntries_MPIAIJ,
2689                                 /*24*/ MatZeroRows_MPIAIJ,
2690                                        NULL,
2691                                        NULL,
2692                                        NULL,
2693                                        NULL,
2694                                 /*29*/ MatSetUp_MPIAIJ,
2695                                        NULL,
2696                                        NULL,
2697                                        MatGetDiagonalBlock_MPIAIJ,
2698                                        NULL,
2699                                 /*34*/ MatDuplicate_MPIAIJ,
2700                                        NULL,
2701                                        NULL,
2702                                        NULL,
2703                                        NULL,
2704                                 /*39*/ MatAXPY_MPIAIJ,
2705                                        MatCreateSubMatrices_MPIAIJ,
2706                                        MatIncreaseOverlap_MPIAIJ,
2707                                        MatGetValues_MPIAIJ,
2708                                        MatCopy_MPIAIJ,
2709                                 /*44*/ MatGetRowMax_MPIAIJ,
2710                                        MatScale_MPIAIJ,
2711                                        MatShift_MPIAIJ,
2712                                        MatDiagonalSet_MPIAIJ,
2713                                        MatZeroRowsColumns_MPIAIJ,
2714                                 /*49*/ MatSetRandom_MPIAIJ,
2715                                        NULL,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                 /*54*/ MatFDColoringCreate_MPIXAIJ,
2720                                        NULL,
2721                                        MatSetUnfactored_MPIAIJ,
2722                                        MatPermute_MPIAIJ,
2723                                        NULL,
2724                                 /*59*/ MatCreateSubMatrix_MPIAIJ,
2725                                        MatDestroy_MPIAIJ,
2726                                        MatView_MPIAIJ,
2727                                        NULL,
2728                                        NULL,
2729                                 /*64*/ NULL,
2730                                        MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                 /*69*/ MatGetRowMaxAbs_MPIAIJ,
2735                                        MatGetRowMinAbs_MPIAIJ,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                        NULL,
2740                                 /*75*/ MatFDColoringApply_AIJ,
2741                                        MatSetFromOptions_MPIAIJ,
2742                                        NULL,
2743                                        NULL,
2744                                        MatFindZeroDiagonals_MPIAIJ,
2745                                 /*80*/ NULL,
2746                                        NULL,
2747                                        NULL,
2748                                 /*83*/ MatLoad_MPIAIJ,
2749                                        MatIsSymmetric_MPIAIJ,
2750                                        NULL,
2751                                        NULL,
2752                                        NULL,
2753                                        NULL,
2754                                 /*89*/ NULL,
2755                                        NULL,
2756                                        MatMatMultNumeric_MPIAIJ_MPIAIJ,
2757                                        NULL,
2758                                        NULL,
2759                                 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ,
2760                                        NULL,
2761                                        NULL,
2762                                        NULL,
2763                                        MatBindToCPU_MPIAIJ,
2764                                 /*99*/ MatProductSetFromOptions_MPIAIJ,
2765                                        NULL,
2766                                        NULL,
2767                                        MatConjugate_MPIAIJ,
2768                                        NULL,
2769                                 /*104*/MatSetValuesRow_MPIAIJ,
2770                                        MatRealPart_MPIAIJ,
2771                                        MatImaginaryPart_MPIAIJ,
2772                                        NULL,
2773                                        NULL,
2774                                 /*109*/NULL,
2775                                        NULL,
2776                                        MatGetRowMin_MPIAIJ,
2777                                        NULL,
2778                                        MatMissingDiagonal_MPIAIJ,
2779                                 /*114*/MatGetSeqNonzeroStructure_MPIAIJ,
2780                                        NULL,
2781                                        MatGetGhosts_MPIAIJ,
2782                                        NULL,
2783                                        NULL,
2784                                 /*119*/MatMultDiagonalBlock_MPIAIJ,
2785                                        NULL,
2786                                        NULL,
2787                                        NULL,
2788                                        MatGetMultiProcBlock_MPIAIJ,
2789                                 /*124*/MatFindNonzeroRows_MPIAIJ,
2790                                        MatGetColumnNorms_MPIAIJ,
2791                                        MatInvertBlockDiagonal_MPIAIJ,
2792                                        MatInvertVariableBlockDiagonal_MPIAIJ,
2793                                        MatCreateSubMatricesMPI_MPIAIJ,
2794                                 /*129*/NULL,
2795                                        NULL,
2796                                        NULL,
2797                                        MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ,
2798                                        NULL,
2799                                 /*134*/NULL,
2800                                        NULL,
2801                                        NULL,
2802                                        NULL,
2803                                        NULL,
2804                                 /*139*/MatSetBlockSizes_MPIAIJ,
2805                                        NULL,
2806                                        NULL,
2807                                        MatFDColoringSetUp_MPIXAIJ,
2808                                        MatFindOffBlockDiagonalEntries_MPIAIJ,
2809                                        MatCreateMPIMatConcatenateSeqMat_MPIAIJ,
2810                                 /*145*/NULL,
2811                                        NULL,
2812                                        NULL
2813 };
2814 
2815 /* ----------------------------------------------------------------------------------------*/
2816 
2817 PetscErrorCode  MatStoreValues_MPIAIJ(Mat mat)
2818 {
2819   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2820   PetscErrorCode ierr;
2821 
2822   PetscFunctionBegin;
2823   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
2824   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
2825   PetscFunctionReturn(0);
2826 }
2827 
2828 PetscErrorCode  MatRetrieveValues_MPIAIJ(Mat mat)
2829 {
2830   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
2831   PetscErrorCode ierr;
2832 
2833   PetscFunctionBegin;
2834   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
2835   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
2836   PetscFunctionReturn(0);
2837 }
2838 
2839 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
2840 {
2841   Mat_MPIAIJ     *b;
2842   PetscErrorCode ierr;
2843   PetscMPIInt    size;
2844 
2845   PetscFunctionBegin;
2846   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2847   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2848   b = (Mat_MPIAIJ*)B->data;
2849 
2850 #if defined(PETSC_USE_CTABLE)
2851   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2852 #else
2853   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2854 #endif
2855   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2856   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2857   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2858 
2859   /* Because the B will have been resized we simply destroy it and create a new one each time */
2860   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
2861   ierr = MatDestroy(&b->B);CHKERRQ(ierr);
2862   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
2863   ierr = MatSetSizes(b->B,B->rmap->n,size > 1 ? B->cmap->N : 0,B->rmap->n,size > 1 ? B->cmap->N : 0);CHKERRQ(ierr);
2864   ierr = MatSetBlockSizesFromMats(b->B,B,B);CHKERRQ(ierr);
2865   ierr = MatSetType(b->B,MATSEQAIJ);CHKERRQ(ierr);
2866   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
2867 
2868   if (!B->preallocated) {
2869     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
2870     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
2871     ierr = MatSetBlockSizesFromMats(b->A,B,B);CHKERRQ(ierr);
2872     ierr = MatSetType(b->A,MATSEQAIJ);CHKERRQ(ierr);
2873     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
2874   }
2875 
2876   ierr = MatSeqAIJSetPreallocation(b->A,d_nz,d_nnz);CHKERRQ(ierr);
2877   ierr = MatSeqAIJSetPreallocation(b->B,o_nz,o_nnz);CHKERRQ(ierr);
2878   B->preallocated  = PETSC_TRUE;
2879   B->was_assembled = PETSC_FALSE;
2880   B->assembled     = PETSC_FALSE;
2881   PetscFunctionReturn(0);
2882 }
2883 
2884 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B)
2885 {
2886   Mat_MPIAIJ     *b;
2887   PetscErrorCode ierr;
2888 
2889   PetscFunctionBegin;
2890   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
2891   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2892   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2893   b = (Mat_MPIAIJ*)B->data;
2894 
2895 #if defined(PETSC_USE_CTABLE)
2896   ierr = PetscTableDestroy(&b->colmap);CHKERRQ(ierr);
2897 #else
2898   ierr = PetscFree(b->colmap);CHKERRQ(ierr);
2899 #endif
2900   ierr = PetscFree(b->garray);CHKERRQ(ierr);
2901   ierr = VecDestroy(&b->lvec);CHKERRQ(ierr);
2902   ierr = VecScatterDestroy(&b->Mvctx);CHKERRQ(ierr);
2903 
2904   ierr = MatResetPreallocation(b->A);CHKERRQ(ierr);
2905   ierr = MatResetPreallocation(b->B);CHKERRQ(ierr);
2906   B->preallocated  = PETSC_TRUE;
2907   B->was_assembled = PETSC_FALSE;
2908   B->assembled = PETSC_FALSE;
2909   PetscFunctionReturn(0);
2910 }
2911 
2912 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2913 {
2914   Mat            mat;
2915   Mat_MPIAIJ     *a,*oldmat = (Mat_MPIAIJ*)matin->data;
2916   PetscErrorCode ierr;
2917 
2918   PetscFunctionBegin;
2919   *newmat = NULL;
2920   ierr    = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2921   ierr    = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2922   ierr    = MatSetBlockSizesFromMats(mat,matin,matin);CHKERRQ(ierr);
2923   ierr    = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2924   a       = (Mat_MPIAIJ*)mat->data;
2925 
2926   mat->factortype   = matin->factortype;
2927   mat->assembled    = matin->assembled;
2928   mat->insertmode   = NOT_SET_VALUES;
2929   mat->preallocated = matin->preallocated;
2930 
2931   a->size         = oldmat->size;
2932   a->rank         = oldmat->rank;
2933   a->donotstash   = oldmat->donotstash;
2934   a->roworiented  = oldmat->roworiented;
2935   a->rowindices   = NULL;
2936   a->rowvalues    = NULL;
2937   a->getrowactive = PETSC_FALSE;
2938 
2939   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2940   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2941 
2942   if (oldmat->colmap) {
2943 #if defined(PETSC_USE_CTABLE)
2944     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2945 #else
2946     ierr = PetscMalloc1(mat->cmap->N,&a->colmap);CHKERRQ(ierr);
2947     ierr = PetscLogObjectMemory((PetscObject)mat,(mat->cmap->N)*sizeof(PetscInt));CHKERRQ(ierr);
2948     ierr = PetscArraycpy(a->colmap,oldmat->colmap,mat->cmap->N);CHKERRQ(ierr);
2949 #endif
2950   } else a->colmap = NULL;
2951   if (oldmat->garray) {
2952     PetscInt len;
2953     len  = oldmat->B->cmap->n;
2954     ierr = PetscMalloc1(len+1,&a->garray);CHKERRQ(ierr);
2955     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2956     if (len) { ierr = PetscArraycpy(a->garray,oldmat->garray,len);CHKERRQ(ierr); }
2957   } else a->garray = NULL;
2958 
2959   /* It may happen MatDuplicate is called with a non-assembled matrix
2960      In fact, MatDuplicate only requires the matrix to be preallocated
2961      This may happen inside a DMCreateMatrix_Shell */
2962   if (oldmat->lvec) {
2963     ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2964     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2965   }
2966   if (oldmat->Mvctx) {
2967     ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2968     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2969   }
2970   ierr    = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2971   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2972   ierr    = MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2973   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2974   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2975   *newmat = mat;
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer)
2980 {
2981   PetscBool      isbinary, ishdf5;
2982   PetscErrorCode ierr;
2983 
2984   PetscFunctionBegin;
2985   PetscValidHeaderSpecific(newMat,MAT_CLASSID,1);
2986   PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2);
2987   /* force binary viewer to load .info file if it has not yet done so */
2988   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
2989   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
2990   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5,  &ishdf5);CHKERRQ(ierr);
2991   if (isbinary) {
2992     ierr = MatLoad_MPIAIJ_Binary(newMat,viewer);CHKERRQ(ierr);
2993   } else if (ishdf5) {
2994 #if defined(PETSC_HAVE_HDF5)
2995     ierr = MatLoad_AIJ_HDF5(newMat,viewer);CHKERRQ(ierr);
2996 #else
2997     SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5");
2998 #endif
2999   } else {
3000     SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name);
3001   }
3002   PetscFunctionReturn(0);
3003 }
3004 
3005 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer)
3006 {
3007   PetscInt       header[4],M,N,m,nz,rows,cols,sum,i;
3008   PetscInt       *rowidxs,*colidxs;
3009   PetscScalar    *matvals;
3010   PetscErrorCode ierr;
3011 
3012   PetscFunctionBegin;
3013   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3014 
3015   /* read in matrix header */
3016   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3017   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3018   M  = header[1]; N = header[2]; nz = header[3];
3019   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3020   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3021   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as MPIAIJ");
3022 
3023   /* set block sizes from the viewer's .info file */
3024   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3025   /* set global sizes if not set already */
3026   if (mat->rmap->N < 0) mat->rmap->N = M;
3027   if (mat->cmap->N < 0) mat->cmap->N = N;
3028   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3029   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3030 
3031   /* check if the matrix sizes are correct */
3032   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3033   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3034 
3035   /* read in row lengths and build row indices */
3036   ierr = MatGetLocalSize(mat,&m,NULL);CHKERRQ(ierr);
3037   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3038   ierr = PetscViewerBinaryReadAll(viewer,rowidxs+1,m,PETSC_DECIDE,M,PETSC_INT);CHKERRQ(ierr);
3039   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3040   ierr = MPIU_Allreduce(&rowidxs[m],&sum,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)viewer));CHKERRQ(ierr);
3041   if (sum != nz) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3042   /* read in column indices and matrix values */
3043   ierr = PetscMalloc2(rowidxs[m],&colidxs,rowidxs[m],&matvals);CHKERRQ(ierr);
3044   ierr = PetscViewerBinaryReadAll(viewer,colidxs,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_INT);CHKERRQ(ierr);
3045   ierr = PetscViewerBinaryReadAll(viewer,matvals,rowidxs[m],PETSC_DETERMINE,PETSC_DETERMINE,PETSC_SCALAR);CHKERRQ(ierr);
3046   /* store matrix indices and values */
3047   ierr = MatMPIAIJSetPreallocationCSR(mat,rowidxs,colidxs,matvals);CHKERRQ(ierr);
3048   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3049   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3050   PetscFunctionReturn(0);
3051 }
3052 
3053 /* Not scalable because of ISAllGather() unless getting all columns. */
3054 PetscErrorCode ISGetSeqIS_Private(Mat mat,IS iscol,IS *isseq)
3055 {
3056   PetscErrorCode ierr;
3057   IS             iscol_local;
3058   PetscBool      isstride;
3059   PetscMPIInt    lisstride=0,gisstride;
3060 
3061   PetscFunctionBegin;
3062   /* check if we are grabbing all columns*/
3063   ierr = PetscObjectTypeCompare((PetscObject)iscol,ISSTRIDE,&isstride);CHKERRQ(ierr);
3064 
3065   if (isstride) {
3066     PetscInt  start,len,mstart,mlen;
3067     ierr = ISStrideGetInfo(iscol,&start,NULL);CHKERRQ(ierr);
3068     ierr = ISGetLocalSize(iscol,&len);CHKERRQ(ierr);
3069     ierr = MatGetOwnershipRangeColumn(mat,&mstart,&mlen);CHKERRQ(ierr);
3070     if (mstart == start && mlen-mstart == len) lisstride = 1;
3071   }
3072 
3073   ierr = MPIU_Allreduce(&lisstride,&gisstride,1,MPI_INT,MPI_MIN,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3074   if (gisstride) {
3075     PetscInt N;
3076     ierr = MatGetSize(mat,NULL,&N);CHKERRQ(ierr);
3077     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iscol_local);CHKERRQ(ierr);
3078     ierr = ISSetIdentity(iscol_local);CHKERRQ(ierr);
3079     ierr = PetscInfo(mat,"Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n");CHKERRQ(ierr);
3080   } else {
3081     PetscInt cbs;
3082     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3083     ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr);
3084     ierr = ISSetBlockSize(iscol_local,cbs);CHKERRQ(ierr);
3085   }
3086 
3087   *isseq = iscol_local;
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 /*
3092  Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local
3093  (see MatCreateSubMatrix_MPIAIJ_nonscalable)
3094 
3095  Input Parameters:
3096    mat - matrix
3097    isrow - parallel row index set; its local indices are a subset of local columns of mat,
3098            i.e., mat->rstart <= isrow[i] < mat->rend
3099    iscol - parallel column index set; its local indices are a subset of local columns of mat,
3100            i.e., mat->cstart <= iscol[i] < mat->cend
3101  Output Parameter:
3102    isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A
3103    iscol_o - sequential column index set for retrieving mat->B
3104    garray - column map; garray[i] indicates global location of iscol_o[i] in iscol
3105  */
3106 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat,IS isrow,IS iscol,IS *isrow_d,IS *iscol_d,IS *iscol_o,const PetscInt *garray[])
3107 {
3108   PetscErrorCode ierr;
3109   Vec            x,cmap;
3110   const PetscInt *is_idx;
3111   PetscScalar    *xarray,*cmaparray;
3112   PetscInt       ncols,isstart,*idx,m,rstart,*cmap1,count;
3113   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3114   Mat            B=a->B;
3115   Vec            lvec=a->lvec,lcmap;
3116   PetscInt       i,cstart,cend,Bn=B->cmap->N;
3117   MPI_Comm       comm;
3118   VecScatter     Mvctx=a->Mvctx;
3119 
3120   PetscFunctionBegin;
3121   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3122   ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr);
3123 
3124   /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */
3125   ierr = MatCreateVecs(mat,&x,NULL);CHKERRQ(ierr);
3126   ierr = VecSet(x,-1.0);CHKERRQ(ierr);
3127   ierr = VecDuplicate(x,&cmap);CHKERRQ(ierr);
3128   ierr = VecSet(cmap,-1.0);CHKERRQ(ierr);
3129 
3130   /* Get start indices */
3131   ierr = MPI_Scan(&ncols,&isstart,1,MPIU_INT,MPI_SUM,comm);CHKERRMPI(ierr);
3132   isstart -= ncols;
3133   ierr = MatGetOwnershipRangeColumn(mat,&cstart,&cend);CHKERRQ(ierr);
3134 
3135   ierr = ISGetIndices(iscol,&is_idx);CHKERRQ(ierr);
3136   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3137   ierr = VecGetArray(cmap,&cmaparray);CHKERRQ(ierr);
3138   ierr = PetscMalloc1(ncols,&idx);CHKERRQ(ierr);
3139   for (i=0; i<ncols; i++) {
3140     xarray[is_idx[i]-cstart]    = (PetscScalar)is_idx[i];
3141     cmaparray[is_idx[i]-cstart] = i + isstart;      /* global index of iscol[i] */
3142     idx[i]                      = is_idx[i]-cstart; /* local index of iscol[i]  */
3143   }
3144   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
3145   ierr = VecRestoreArray(cmap,&cmaparray);CHKERRQ(ierr);
3146   ierr = ISRestoreIndices(iscol,&is_idx);CHKERRQ(ierr);
3147 
3148   /* Get iscol_d */
3149   ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,iscol_d);CHKERRQ(ierr);
3150   ierr = ISGetBlockSize(iscol,&i);CHKERRQ(ierr);
3151   ierr = ISSetBlockSize(*iscol_d,i);CHKERRQ(ierr);
3152 
3153   /* Get isrow_d */
3154   ierr = ISGetLocalSize(isrow,&m);CHKERRQ(ierr);
3155   rstart = mat->rmap->rstart;
3156   ierr = PetscMalloc1(m,&idx);CHKERRQ(ierr);
3157   ierr = ISGetIndices(isrow,&is_idx);CHKERRQ(ierr);
3158   for (i=0; i<m; i++) idx[i] = is_idx[i]-rstart;
3159   ierr = ISRestoreIndices(isrow,&is_idx);CHKERRQ(ierr);
3160 
3161   ierr = ISCreateGeneral(PETSC_COMM_SELF,m,idx,PETSC_OWN_POINTER,isrow_d);CHKERRQ(ierr);
3162   ierr = ISGetBlockSize(isrow,&i);CHKERRQ(ierr);
3163   ierr = ISSetBlockSize(*isrow_d,i);CHKERRQ(ierr);
3164 
3165   /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */
3166   ierr = VecScatterBegin(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3167   ierr = VecScatterEnd(Mvctx,x,lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3168 
3169   ierr = VecDuplicate(lvec,&lcmap);CHKERRQ(ierr);
3170 
3171   ierr = VecScatterBegin(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3172   ierr = VecScatterEnd(Mvctx,cmap,lcmap,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
3173 
3174   /* (3) create sequential iscol_o (a subset of iscol) and isgarray */
3175   /* off-process column indices */
3176   count = 0;
3177   ierr = PetscMalloc1(Bn,&idx);CHKERRQ(ierr);
3178   ierr = PetscMalloc1(Bn,&cmap1);CHKERRQ(ierr);
3179 
3180   ierr = VecGetArray(lvec,&xarray);CHKERRQ(ierr);
3181   ierr = VecGetArray(lcmap,&cmaparray);CHKERRQ(ierr);
3182   for (i=0; i<Bn; i++) {
3183     if (PetscRealPart(xarray[i]) > -1.0) {
3184       idx[count]     = i;                   /* local column index in off-diagonal part B */
3185       cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]);  /* column index in submat */
3186       count++;
3187     }
3188   }
3189   ierr = VecRestoreArray(lvec,&xarray);CHKERRQ(ierr);
3190   ierr = VecRestoreArray(lcmap,&cmaparray);CHKERRQ(ierr);
3191 
3192   ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_COPY_VALUES,iscol_o);CHKERRQ(ierr);
3193   /* cannot ensure iscol_o has same blocksize as iscol! */
3194 
3195   ierr = PetscFree(idx);CHKERRQ(ierr);
3196   *garray = cmap1;
3197 
3198   ierr = VecDestroy(&x);CHKERRQ(ierr);
3199   ierr = VecDestroy(&cmap);CHKERRQ(ierr);
3200   ierr = VecDestroy(&lcmap);CHKERRQ(ierr);
3201   PetscFunctionReturn(0);
3202 }
3203 
3204 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */
3205 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *submat)
3206 {
3207   PetscErrorCode ierr;
3208   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)mat->data,*asub;
3209   Mat            M = NULL;
3210   MPI_Comm       comm;
3211   IS             iscol_d,isrow_d,iscol_o;
3212   Mat            Asub = NULL,Bsub = NULL;
3213   PetscInt       n;
3214 
3215   PetscFunctionBegin;
3216   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3217 
3218   if (call == MAT_REUSE_MATRIX) {
3219     /* Retrieve isrow_d, iscol_d and iscol_o from submat */
3220     ierr = PetscObjectQuery((PetscObject)*submat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3221     if (!isrow_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"isrow_d passed in was not used before, cannot reuse");
3222 
3223     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_d",(PetscObject*)&iscol_d);CHKERRQ(ierr);
3224     if (!iscol_d) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_d passed in was not used before, cannot reuse");
3225 
3226     ierr = PetscObjectQuery((PetscObject)*submat,"iscol_o",(PetscObject*)&iscol_o);CHKERRQ(ierr);
3227     if (!iscol_o) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"iscol_o passed in was not used before, cannot reuse");
3228 
3229     /* Update diagonal and off-diagonal portions of submat */
3230     asub = (Mat_MPIAIJ*)(*submat)->data;
3231     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->A);CHKERRQ(ierr);
3232     ierr = ISGetLocalSize(iscol_o,&n);CHKERRQ(ierr);
3233     if (n) {
3234       ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_REUSE_MATRIX,&asub->B);CHKERRQ(ierr);
3235     }
3236     ierr = MatAssemblyBegin(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3237     ierr = MatAssemblyEnd(*submat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3238 
3239   } else { /* call == MAT_INITIAL_MATRIX) */
3240     const PetscInt *garray;
3241     PetscInt        BsubN;
3242 
3243     /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */
3244     ierr = ISGetSeqIS_SameColDist_Private(mat,isrow,iscol,&isrow_d,&iscol_d,&iscol_o,&garray);CHKERRQ(ierr);
3245 
3246     /* Create local submatrices Asub and Bsub */
3247     ierr = MatCreateSubMatrix_SeqAIJ(a->A,isrow_d,iscol_d,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Asub);CHKERRQ(ierr);
3248     ierr = MatCreateSubMatrix_SeqAIJ(a->B,isrow_d,iscol_o,PETSC_DECIDE,MAT_INITIAL_MATRIX,&Bsub);CHKERRQ(ierr);
3249 
3250     /* Create submatrix M */
3251     ierr = MatCreateMPIAIJWithSeqAIJ(comm,Asub,Bsub,garray,&M);CHKERRQ(ierr);
3252 
3253     /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */
3254     asub = (Mat_MPIAIJ*)M->data;
3255 
3256     ierr = ISGetLocalSize(iscol_o,&BsubN);CHKERRQ(ierr);
3257     n = asub->B->cmap->N;
3258     if (BsubN > n) {
3259       /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */
3260       const PetscInt *idx;
3261       PetscInt       i,j,*idx_new,*subgarray = asub->garray;
3262       ierr = PetscInfo2(M,"submatrix Bn %D != BsubN %D, update iscol_o\n",n,BsubN);CHKERRQ(ierr);
3263 
3264       ierr = PetscMalloc1(n,&idx_new);CHKERRQ(ierr);
3265       j = 0;
3266       ierr = ISGetIndices(iscol_o,&idx);CHKERRQ(ierr);
3267       for (i=0; i<n; i++) {
3268         if (j >= BsubN) break;
3269         while (subgarray[i] > garray[j]) j++;
3270 
3271         if (subgarray[i] == garray[j]) {
3272           idx_new[i] = idx[j++];
3273         } else SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"subgarray[%D]=%D cannot < garray[%D]=%D",i,subgarray[i],j,garray[j]);
3274       }
3275       ierr = ISRestoreIndices(iscol_o,&idx);CHKERRQ(ierr);
3276 
3277       ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3278       ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idx_new,PETSC_OWN_POINTER,&iscol_o);CHKERRQ(ierr);
3279 
3280     } else if (BsubN < n) {
3281       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Columns of Bsub cannot be smaller than B's",BsubN,asub->B->cmap->N);
3282     }
3283 
3284     ierr = PetscFree(garray);CHKERRQ(ierr);
3285     *submat = M;
3286 
3287     /* Save isrow_d, iscol_d and iscol_o used in processor for next request */
3288     ierr = PetscObjectCompose((PetscObject)M,"isrow_d",(PetscObject)isrow_d);CHKERRQ(ierr);
3289     ierr = ISDestroy(&isrow_d);CHKERRQ(ierr);
3290 
3291     ierr = PetscObjectCompose((PetscObject)M,"iscol_d",(PetscObject)iscol_d);CHKERRQ(ierr);
3292     ierr = ISDestroy(&iscol_d);CHKERRQ(ierr);
3293 
3294     ierr = PetscObjectCompose((PetscObject)M,"iscol_o",(PetscObject)iscol_o);CHKERRQ(ierr);
3295     ierr = ISDestroy(&iscol_o);CHKERRQ(ierr);
3296   }
3297   PetscFunctionReturn(0);
3298 }
3299 
3300 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat,IS isrow,IS iscol,MatReuse call,Mat *newmat)
3301 {
3302   PetscErrorCode ierr;
3303   IS             iscol_local=NULL,isrow_d;
3304   PetscInt       csize;
3305   PetscInt       n,i,j,start,end;
3306   PetscBool      sameRowDist=PETSC_FALSE,sameDist[2],tsameDist[2];
3307   MPI_Comm       comm;
3308 
3309   PetscFunctionBegin;
3310   /* If isrow has same processor distribution as mat,
3311      call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */
3312   if (call == MAT_REUSE_MATRIX) {
3313     ierr = PetscObjectQuery((PetscObject)*newmat,"isrow_d",(PetscObject*)&isrow_d);CHKERRQ(ierr);
3314     if (isrow_d) {
3315       sameRowDist  = PETSC_TRUE;
3316       tsameDist[1] = PETSC_TRUE; /* sameColDist */
3317     } else {
3318       ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3319       if (iscol_local) {
3320         sameRowDist  = PETSC_TRUE;
3321         tsameDist[1] = PETSC_FALSE; /* !sameColDist */
3322       }
3323     }
3324   } else {
3325     /* Check if isrow has same processor distribution as mat */
3326     sameDist[0] = PETSC_FALSE;
3327     ierr = ISGetLocalSize(isrow,&n);CHKERRQ(ierr);
3328     if (!n) {
3329       sameDist[0] = PETSC_TRUE;
3330     } else {
3331       ierr = ISGetMinMax(isrow,&i,&j);CHKERRQ(ierr);
3332       ierr = MatGetOwnershipRange(mat,&start,&end);CHKERRQ(ierr);
3333       if (i >= start && j < end) {
3334         sameDist[0] = PETSC_TRUE;
3335       }
3336     }
3337 
3338     /* Check if iscol has same processor distribution as mat */
3339     sameDist[1] = PETSC_FALSE;
3340     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3341     if (!n) {
3342       sameDist[1] = PETSC_TRUE;
3343     } else {
3344       ierr = ISGetMinMax(iscol,&i,&j);CHKERRQ(ierr);
3345       ierr = MatGetOwnershipRangeColumn(mat,&start,&end);CHKERRQ(ierr);
3346       if (i >= start && j < end) sameDist[1] = PETSC_TRUE;
3347     }
3348 
3349     ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3350     ierr = MPIU_Allreduce(&sameDist,&tsameDist,2,MPIU_BOOL,MPI_LAND,comm);CHKERRQ(ierr);
3351     sameRowDist = tsameDist[0];
3352   }
3353 
3354   if (sameRowDist) {
3355     if (tsameDist[1]) { /* sameRowDist & sameColDist */
3356       /* isrow and iscol have same processor distribution as mat */
3357       ierr = MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat,isrow,iscol,call,newmat);CHKERRQ(ierr);
3358       PetscFunctionReturn(0);
3359     } else { /* sameRowDist */
3360       /* isrow has same processor distribution as mat */
3361       if (call == MAT_INITIAL_MATRIX) {
3362         PetscBool sorted;
3363         ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3364         ierr = ISGetLocalSize(iscol_local,&n);CHKERRQ(ierr); /* local size of iscol_local = global columns of newmat */
3365         ierr = ISGetSize(iscol,&i);CHKERRQ(ierr);
3366         if (n != i) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"n %d != size of iscol %d",n,i);
3367 
3368         ierr = ISSorted(iscol_local,&sorted);CHKERRQ(ierr);
3369         if (sorted) {
3370           /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */
3371           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,iscol_local,MAT_INITIAL_MATRIX,newmat);CHKERRQ(ierr);
3372           PetscFunctionReturn(0);
3373         }
3374       } else { /* call == MAT_REUSE_MATRIX */
3375         IS iscol_sub;
3376         ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3377         if (iscol_sub) {
3378           ierr = MatCreateSubMatrix_MPIAIJ_SameRowDist(mat,isrow,iscol,NULL,call,newmat);CHKERRQ(ierr);
3379           PetscFunctionReturn(0);
3380         }
3381       }
3382     }
3383   }
3384 
3385   /* General case: iscol -> iscol_local which has global size of iscol */
3386   if (call == MAT_REUSE_MATRIX) {
3387     ierr = PetscObjectQuery((PetscObject)*newmat,"ISAllGather",(PetscObject*)&iscol_local);CHKERRQ(ierr);
3388     if (!iscol_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3389   } else {
3390     if (!iscol_local) {
3391       ierr = ISGetSeqIS_Private(mat,iscol,&iscol_local);CHKERRQ(ierr);
3392     }
3393   }
3394 
3395   ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3396   ierr = MatCreateSubMatrix_MPIAIJ_nonscalable(mat,isrow,iscol_local,csize,call,newmat);CHKERRQ(ierr);
3397 
3398   if (call == MAT_INITIAL_MATRIX) {
3399     ierr = PetscObjectCompose((PetscObject)*newmat,"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3400     ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3401   }
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 /*@C
3406      MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal"
3407          and "off-diagonal" part of the matrix in CSR format.
3408 
3409    Collective
3410 
3411    Input Parameters:
3412 +  comm - MPI communicator
3413 .  A - "diagonal" portion of matrix
3414 .  B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine
3415 -  garray - global index of B columns
3416 
3417    Output Parameter:
3418 .   mat - the matrix, with input A as its local diagonal matrix
3419    Level: advanced
3420 
3421    Notes:
3422        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix.
3423        A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore.
3424 
3425 .seealso: MatCreateMPIAIJWithSplitArrays()
3426 @*/
3427 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm,Mat A,Mat B,const PetscInt garray[],Mat *mat)
3428 {
3429   PetscErrorCode    ierr;
3430   Mat_MPIAIJ        *maij;
3431   Mat_SeqAIJ        *b=(Mat_SeqAIJ*)B->data,*bnew;
3432   PetscInt          *oi=b->i,*oj=b->j,i,nz,col;
3433   const PetscScalar *oa;
3434   Mat               Bnew;
3435   PetscInt          m,n,N;
3436 
3437   PetscFunctionBegin;
3438   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3439   ierr = MatGetSize(A,&m,&n);CHKERRQ(ierr);
3440   if (m != B->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Am %D != Bm %D",m,B->rmap->N);
3441   if (A->rmap->bs != B->rmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A row bs %D != B row bs %D",A->rmap->bs,B->rmap->bs);
3442   /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */
3443   /* if (A->cmap->bs != B->cmap->bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %D != B column bs %D",A->cmap->bs,B->cmap->bs); */
3444 
3445   /* Get global columns of mat */
3446   ierr = MPIU_Allreduce(&n,&N,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3447 
3448   ierr = MatSetSizes(*mat,m,n,PETSC_DECIDE,N);CHKERRQ(ierr);
3449   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
3450   ierr = MatSetBlockSizes(*mat,A->rmap->bs,A->cmap->bs);CHKERRQ(ierr);
3451   maij = (Mat_MPIAIJ*)(*mat)->data;
3452 
3453   (*mat)->preallocated = PETSC_TRUE;
3454 
3455   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
3456   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
3457 
3458   /* Set A as diagonal portion of *mat */
3459   maij->A = A;
3460 
3461   nz = oi[m];
3462   for (i=0; i<nz; i++) {
3463     col   = oj[i];
3464     oj[i] = garray[col];
3465   }
3466 
3467   /* Set Bnew as off-diagonal portion of *mat */
3468   ierr = MatSeqAIJGetArrayRead(B,&oa);CHKERRQ(ierr);
3469   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,N,oi,oj,(PetscScalar*)oa,&Bnew);CHKERRQ(ierr);
3470   ierr = MatSeqAIJRestoreArrayRead(B,&oa);CHKERRQ(ierr);
3471   bnew        = (Mat_SeqAIJ*)Bnew->data;
3472   bnew->maxnz = b->maxnz; /* allocated nonzeros of B */
3473   maij->B     = Bnew;
3474 
3475   if (B->rmap->N != Bnew->rmap->N) SETERRQ2(PETSC_COMM_SELF,0,"BN %d != BnewN %d",B->rmap->N,Bnew->rmap->N);
3476 
3477   b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */
3478   b->free_a       = PETSC_FALSE;
3479   b->free_ij      = PETSC_FALSE;
3480   ierr = MatDestroy(&B);CHKERRQ(ierr);
3481 
3482   bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */
3483   bnew->free_a       = PETSC_TRUE;
3484   bnew->free_ij      = PETSC_TRUE;
3485 
3486   /* condense columns of maij->B */
3487   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
3488   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3489   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3490   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
3491   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat,PetscInt,const IS[],const IS[],MatReuse,PetscBool,Mat*);
3496 
3497 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat,IS isrow,IS iscol,IS iscol_local,MatReuse call,Mat *newmat)
3498 {
3499   PetscErrorCode ierr;
3500   PetscInt       i,m,n,rstart,row,rend,nz,j,bs,cbs;
3501   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3502   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)mat->data;
3503   Mat            M,Msub,B=a->B;
3504   MatScalar      *aa;
3505   Mat_SeqAIJ     *aij;
3506   PetscInt       *garray = a->garray,*colsub,Ncols;
3507   PetscInt       count,Bn=B->cmap->N,cstart=mat->cmap->rstart,cend=mat->cmap->rend;
3508   IS             iscol_sub,iscmap;
3509   const PetscInt *is_idx,*cmap;
3510   PetscBool      allcolumns=PETSC_FALSE;
3511   MPI_Comm       comm;
3512 
3513   PetscFunctionBegin;
3514   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3515   if (call == MAT_REUSE_MATRIX) {
3516     ierr = PetscObjectQuery((PetscObject)*newmat,"SubIScol",(PetscObject*)&iscol_sub);CHKERRQ(ierr);
3517     if (!iscol_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"SubIScol passed in was not used before, cannot reuse");
3518     ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3519 
3520     ierr = PetscObjectQuery((PetscObject)*newmat,"Subcmap",(PetscObject*)&iscmap);CHKERRQ(ierr);
3521     if (!iscmap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcmap passed in was not used before, cannot reuse");
3522 
3523     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Msub);CHKERRQ(ierr);
3524     if (!Msub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3525 
3526     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_REUSE_MATRIX,PETSC_FALSE,&Msub);CHKERRQ(ierr);
3527 
3528   } else { /* call == MAT_INITIAL_MATRIX) */
3529     PetscBool flg;
3530 
3531     ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3532     ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr);
3533 
3534     /* (1) iscol -> nonscalable iscol_local */
3535     /* Check for special case: each processor gets entire matrix columns */
3536     ierr = ISIdentity(iscol_local,&flg);CHKERRQ(ierr);
3537     if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3538     ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3539     if (allcolumns) {
3540       iscol_sub = iscol_local;
3541       ierr = PetscObjectReference((PetscObject)iscol_local);CHKERRQ(ierr);
3542       ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&iscmap);CHKERRQ(ierr);
3543 
3544     } else {
3545       /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */
3546       PetscInt *idx,*cmap1,k;
3547       ierr = PetscMalloc1(Ncols,&idx);CHKERRQ(ierr);
3548       ierr = PetscMalloc1(Ncols,&cmap1);CHKERRQ(ierr);
3549       ierr = ISGetIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3550       count = 0;
3551       k     = 0;
3552       for (i=0; i<Ncols; i++) {
3553         j = is_idx[i];
3554         if (j >= cstart && j < cend) {
3555           /* diagonal part of mat */
3556           idx[count]     = j;
3557           cmap1[count++] = i; /* column index in submat */
3558         } else if (Bn) {
3559           /* off-diagonal part of mat */
3560           if (j == garray[k]) {
3561             idx[count]     = j;
3562             cmap1[count++] = i;  /* column index in submat */
3563           } else if (j > garray[k]) {
3564             while (j > garray[k] && k < Bn-1) k++;
3565             if (j == garray[k]) {
3566               idx[count]     = j;
3567               cmap1[count++] = i; /* column index in submat */
3568             }
3569           }
3570         }
3571       }
3572       ierr = ISRestoreIndices(iscol_local,&is_idx);CHKERRQ(ierr);
3573 
3574       ierr = ISCreateGeneral(PETSC_COMM_SELF,count,idx,PETSC_OWN_POINTER,&iscol_sub);CHKERRQ(ierr);
3575       ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3576       ierr = ISSetBlockSize(iscol_sub,cbs);CHKERRQ(ierr);
3577 
3578       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local),count,cmap1,PETSC_OWN_POINTER,&iscmap);CHKERRQ(ierr);
3579     }
3580 
3581     /* (3) Create sequential Msub */
3582     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol_sub,MAT_INITIAL_MATRIX,allcolumns,&Msub);CHKERRQ(ierr);
3583   }
3584 
3585   ierr = ISGetLocalSize(iscol_sub,&count);CHKERRQ(ierr);
3586   aij  = (Mat_SeqAIJ*)(Msub)->data;
3587   ii   = aij->i;
3588   ierr = ISGetIndices(iscmap,&cmap);CHKERRQ(ierr);
3589 
3590   /*
3591       m - number of local rows
3592       Ncols - number of columns (same on all processors)
3593       rstart - first row in new global matrix generated
3594   */
3595   ierr = MatGetSize(Msub,&m,NULL);CHKERRQ(ierr);
3596 
3597   if (call == MAT_INITIAL_MATRIX) {
3598     /* (4) Create parallel newmat */
3599     PetscMPIInt    rank,size;
3600     PetscInt       csize;
3601 
3602     ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3603     ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3604 
3605     /*
3606         Determine the number of non-zeros in the diagonal and off-diagonal
3607         portions of the matrix in order to do correct preallocation
3608     */
3609 
3610     /* first get start and end of "diagonal" columns */
3611     ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr);
3612     if (csize == PETSC_DECIDE) {
3613       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3614       if (mglobal == Ncols) { /* square matrix */
3615         nlocal = m;
3616       } else {
3617         nlocal = Ncols/size + ((Ncols % size) > rank);
3618       }
3619     } else {
3620       nlocal = csize;
3621     }
3622     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3623     rstart = rend - nlocal;
3624     if (rank == size - 1 && rend != Ncols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,Ncols);
3625 
3626     /* next, compute all the lengths */
3627     jj    = aij->j;
3628     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3629     olens = dlens + m;
3630     for (i=0; i<m; i++) {
3631       jend = ii[i+1] - ii[i];
3632       olen = 0;
3633       dlen = 0;
3634       for (j=0; j<jend; j++) {
3635         if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++;
3636         else dlen++;
3637         jj++;
3638       }
3639       olens[i] = olen;
3640       dlens[i] = dlen;
3641     }
3642 
3643     ierr = ISGetBlockSize(isrow,&bs);CHKERRQ(ierr);
3644     ierr = ISGetBlockSize(iscol,&cbs);CHKERRQ(ierr);
3645 
3646     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3647     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,Ncols);CHKERRQ(ierr);
3648     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3649     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3650     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3651     ierr = PetscFree(dlens);CHKERRQ(ierr);
3652 
3653   } else { /* call == MAT_REUSE_MATRIX */
3654     M    = *newmat;
3655     ierr = MatGetLocalSize(M,&i,NULL);CHKERRQ(ierr);
3656     if (i != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3657     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3658     /*
3659          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3660        rather than the slower MatSetValues().
3661     */
3662     M->was_assembled = PETSC_TRUE;
3663     M->assembled     = PETSC_FALSE;
3664   }
3665 
3666   /* (5) Set values of Msub to *newmat */
3667   ierr = PetscMalloc1(count,&colsub);CHKERRQ(ierr);
3668   ierr = MatGetOwnershipRange(M,&rstart,NULL);CHKERRQ(ierr);
3669 
3670   jj   = aij->j;
3671   ierr = MatSeqAIJGetArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3672   for (i=0; i<m; i++) {
3673     row = rstart + i;
3674     nz  = ii[i+1] - ii[i];
3675     for (j=0; j<nz; j++) colsub[j] = cmap[jj[j]];
3676     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,colsub,aa,INSERT_VALUES);CHKERRQ(ierr);
3677     jj += nz; aa += nz;
3678   }
3679   ierr = MatSeqAIJRestoreArrayRead(Msub,(const PetscScalar**)&aa);CHKERRQ(ierr);
3680   ierr = ISRestoreIndices(iscmap,&cmap);CHKERRQ(ierr);
3681 
3682   ierr = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3683   ierr = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3684 
3685   ierr = PetscFree(colsub);CHKERRQ(ierr);
3686 
3687   /* save Msub, iscol_sub and iscmap used in processor for next request */
3688   if (call == MAT_INITIAL_MATRIX) {
3689     *newmat = M;
3690     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubMatrix",(PetscObject)Msub);CHKERRQ(ierr);
3691     ierr = MatDestroy(&Msub);CHKERRQ(ierr);
3692 
3693     ierr = PetscObjectCompose((PetscObject)(*newmat),"SubIScol",(PetscObject)iscol_sub);CHKERRQ(ierr);
3694     ierr = ISDestroy(&iscol_sub);CHKERRQ(ierr);
3695 
3696     ierr = PetscObjectCompose((PetscObject)(*newmat),"Subcmap",(PetscObject)iscmap);CHKERRQ(ierr);
3697     ierr = ISDestroy(&iscmap);CHKERRQ(ierr);
3698 
3699     if (iscol_local) {
3700       ierr = PetscObjectCompose((PetscObject)(*newmat),"ISAllGather",(PetscObject)iscol_local);CHKERRQ(ierr);
3701       ierr = ISDestroy(&iscol_local);CHKERRQ(ierr);
3702     }
3703   }
3704   PetscFunctionReturn(0);
3705 }
3706 
3707 /*
3708     Not great since it makes two copies of the submatrix, first an SeqAIJ
3709   in local and then by concatenating the local matrices the end result.
3710   Writing it directly would be much like MatCreateSubMatrices_MPIAIJ()
3711 
3712   Note: This requires a sequential iscol with all indices.
3713 */
3714 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat,IS isrow,IS iscol,PetscInt csize,MatReuse call,Mat *newmat)
3715 {
3716   PetscErrorCode ierr;
3717   PetscMPIInt    rank,size;
3718   PetscInt       i,m,n,rstart,row,rend,nz,*cwork,j,bs,cbs;
3719   PetscInt       *ii,*jj,nlocal,*dlens,*olens,dlen,olen,jend,mglobal;
3720   Mat            M,Mreuse;
3721   MatScalar      *aa,*vwork;
3722   MPI_Comm       comm;
3723   Mat_SeqAIJ     *aij;
3724   PetscBool      colflag,allcolumns=PETSC_FALSE;
3725 
3726   PetscFunctionBegin;
3727   ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr);
3728   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
3729   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3730 
3731   /* Check for special case: each processor gets entire matrix columns */
3732   ierr = ISIdentity(iscol,&colflag);CHKERRQ(ierr);
3733   ierr = ISGetLocalSize(iscol,&n);CHKERRQ(ierr);
3734   if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE;
3735   ierr = MPIU_Allreduce(MPI_IN_PLACE,&allcolumns,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
3736 
3737   if (call ==  MAT_REUSE_MATRIX) {
3738     ierr = PetscObjectQuery((PetscObject)*newmat,"SubMatrix",(PetscObject*)&Mreuse);CHKERRQ(ierr);
3739     if (!Mreuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Submatrix passed in was not used before, cannot reuse");
3740     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_REUSE_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3741   } else {
3742     ierr = MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,allcolumns,&Mreuse);CHKERRQ(ierr);
3743   }
3744 
3745   /*
3746       m - number of local rows
3747       n - number of columns (same on all processors)
3748       rstart - first row in new global matrix generated
3749   */
3750   ierr = MatGetSize(Mreuse,&m,&n);CHKERRQ(ierr);
3751   ierr = MatGetBlockSizes(Mreuse,&bs,&cbs);CHKERRQ(ierr);
3752   if (call == MAT_INITIAL_MATRIX) {
3753     aij = (Mat_SeqAIJ*)(Mreuse)->data;
3754     ii  = aij->i;
3755     jj  = aij->j;
3756 
3757     /*
3758         Determine the number of non-zeros in the diagonal and off-diagonal
3759         portions of the matrix in order to do correct preallocation
3760     */
3761 
3762     /* first get start and end of "diagonal" columns */
3763     if (csize == PETSC_DECIDE) {
3764       ierr = ISGetSize(isrow,&mglobal);CHKERRQ(ierr);
3765       if (mglobal == n) { /* square matrix */
3766         nlocal = m;
3767       } else {
3768         nlocal = n/size + ((n % size) > rank);
3769       }
3770     } else {
3771       nlocal = csize;
3772     }
3773     ierr   = MPI_Scan(&nlocal,&rend,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
3774     rstart = rend - nlocal;
3775     if (rank == size - 1 && rend != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local column sizes %D do not add up to total number of columns %D",rend,n);
3776 
3777     /* next, compute all the lengths */
3778     ierr  = PetscMalloc1(2*m+1,&dlens);CHKERRQ(ierr);
3779     olens = dlens + m;
3780     for (i=0; i<m; i++) {
3781       jend = ii[i+1] - ii[i];
3782       olen = 0;
3783       dlen = 0;
3784       for (j=0; j<jend; j++) {
3785         if (*jj < rstart || *jj >= rend) olen++;
3786         else dlen++;
3787         jj++;
3788       }
3789       olens[i] = olen;
3790       dlens[i] = dlen;
3791     }
3792     ierr = MatCreate(comm,&M);CHKERRQ(ierr);
3793     ierr = MatSetSizes(M,m,nlocal,PETSC_DECIDE,n);CHKERRQ(ierr);
3794     ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr);
3795     ierr = MatSetType(M,((PetscObject)mat)->type_name);CHKERRQ(ierr);
3796     ierr = MatMPIAIJSetPreallocation(M,0,dlens,0,olens);CHKERRQ(ierr);
3797     ierr = PetscFree(dlens);CHKERRQ(ierr);
3798   } else {
3799     PetscInt ml,nl;
3800 
3801     M    = *newmat;
3802     ierr = MatGetLocalSize(M,&ml,&nl);CHKERRQ(ierr);
3803     if (ml != m) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Previous matrix must be same size/layout as request");
3804     ierr = MatZeroEntries(M);CHKERRQ(ierr);
3805     /*
3806          The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly,
3807        rather than the slower MatSetValues().
3808     */
3809     M->was_assembled = PETSC_TRUE;
3810     M->assembled     = PETSC_FALSE;
3811   }
3812   ierr = MatGetOwnershipRange(M,&rstart,&rend);CHKERRQ(ierr);
3813   aij  = (Mat_SeqAIJ*)(Mreuse)->data;
3814   ii   = aij->i;
3815   jj   = aij->j;
3816 
3817   /* trigger copy to CPU if needed */
3818   ierr = MatSeqAIJGetArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3819   for (i=0; i<m; i++) {
3820     row   = rstart + i;
3821     nz    = ii[i+1] - ii[i];
3822     cwork = jj; jj += nz;
3823     vwork = aa; aa += nz;
3824     ierr  = MatSetValues_MPIAIJ(M,1,&row,nz,cwork,vwork,INSERT_VALUES);CHKERRQ(ierr);
3825   }
3826   ierr = MatSeqAIJRestoreArrayRead(Mreuse,(const PetscScalar**)&aa);CHKERRQ(ierr);
3827 
3828   ierr    = MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3829   ierr    = MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3830   *newmat = M;
3831 
3832   /* save submatrix used in processor for next request */
3833   if (call ==  MAT_INITIAL_MATRIX) {
3834     ierr = PetscObjectCompose((PetscObject)M,"SubMatrix",(PetscObject)Mreuse);CHKERRQ(ierr);
3835     ierr = MatDestroy(&Mreuse);CHKERRQ(ierr);
3836   }
3837   PetscFunctionReturn(0);
3838 }
3839 
3840 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
3841 {
3842   PetscInt       m,cstart, cend,j,nnz,i,d;
3843   PetscInt       *d_nnz,*o_nnz,nnz_max = 0,rstart,ii;
3844   const PetscInt *JJ;
3845   PetscErrorCode ierr;
3846   PetscBool      nooffprocentries;
3847 
3848   PetscFunctionBegin;
3849   if (Ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Ii[0] must be 0 it is %D",Ii[0]);
3850 
3851   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3852   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3853   m      = B->rmap->n;
3854   cstart = B->cmap->rstart;
3855   cend   = B->cmap->rend;
3856   rstart = B->rmap->rstart;
3857 
3858   ierr = PetscCalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
3859 
3860   if (PetscDefined(USE_DEBUG)) {
3861     for (i=0; i<m; i++) {
3862       nnz = Ii[i+1]- Ii[i];
3863       JJ  = J + Ii[i];
3864       if (nnz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative %D number of columns",i,nnz);
3865       if (nnz && (JJ[0] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D starts with negative column index",i,JJ[0]);
3866       if (nnz && (JJ[nnz-1] >= B->cmap->N)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Row %D ends with too large a column index %D (max allowed %D)",i,JJ[nnz-1],B->cmap->N);
3867     }
3868   }
3869 
3870   for (i=0; i<m; i++) {
3871     nnz     = Ii[i+1]- Ii[i];
3872     JJ      = J + Ii[i];
3873     nnz_max = PetscMax(nnz_max,nnz);
3874     d       = 0;
3875     for (j=0; j<nnz; j++) {
3876       if (cstart <= JJ[j] && JJ[j] < cend) d++;
3877     }
3878     d_nnz[i] = d;
3879     o_nnz[i] = nnz - d;
3880   }
3881   ierr = MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
3882   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
3883 
3884   for (i=0; i<m; i++) {
3885     ii   = i + rstart;
3886     ierr = MatSetValues_MPIAIJ(B,1,&ii,Ii[i+1] - Ii[i],J+Ii[i], v ? v + Ii[i] : NULL,INSERT_VALUES);CHKERRQ(ierr);
3887   }
3888   nooffprocentries    = B->nooffprocentries;
3889   B->nooffprocentries = PETSC_TRUE;
3890   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3891   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   B->nooffprocentries = nooffprocentries;
3893 
3894   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3895   PetscFunctionReturn(0);
3896 }
3897 
3898 /*@
3899    MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format
3900    (the default parallel PETSc format).
3901 
3902    Collective
3903 
3904    Input Parameters:
3905 +  B - the matrix
3906 .  i - the indices into j for the start of each local row (starts with zero)
3907 .  j - the column indices for each local row (starts with zero)
3908 -  v - optional values in the matrix
3909 
3910    Level: developer
3911 
3912    Notes:
3913        The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc;
3914      thus you CANNOT change the matrix entries by changing the values of v[] after you have
3915      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
3916 
3917        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
3918 
3919        The format which is used for the sparse matrix input, is equivalent to a
3920     row-major ordering.. i.e for the following matrix, the input data expected is
3921     as shown
3922 
3923 $        1 0 0
3924 $        2 0 3     P0
3925 $       -------
3926 $        4 5 6     P1
3927 $
3928 $     Process0 [P0]: rows_owned=[0,1]
3929 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
3930 $        j =  {0,0,2}  [size = 3]
3931 $        v =  {1,2,3}  [size = 3]
3932 $
3933 $     Process1 [P1]: rows_owned=[2]
3934 $        i =  {0,3}    [size = nrow+1  = 1+1]
3935 $        j =  {0,1,2}  [size = 3]
3936 $        v =  {4,5,6}  [size = 3]
3937 
3938 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatCreateAIJ(), MATMPIAIJ,
3939           MatCreateSeqAIJWithArrays(), MatCreateMPIAIJWithSplitArrays()
3940 @*/
3941 PetscErrorCode  MatMPIAIJSetPreallocationCSR(Mat B,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3942 {
3943   PetscErrorCode ierr;
3944 
3945   PetscFunctionBegin;
3946   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocationCSR_C",(Mat,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,i,j,v));CHKERRQ(ierr);
3947   PetscFunctionReturn(0);
3948 }
3949 
3950 /*@C
3951    MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format
3952    (the default parallel PETSc format).  For good matrix assembly performance
3953    the user should preallocate the matrix storage by setting the parameters
3954    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
3955    performance can be increased by more than a factor of 50.
3956 
3957    Collective
3958 
3959    Input Parameters:
3960 +  B - the matrix
3961 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
3962            (same value is used for all local rows)
3963 .  d_nnz - array containing the number of nonzeros in the various rows of the
3964            DIAGONAL portion of the local submatrix (possibly different for each row)
3965            or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure.
3966            The size of this array is equal to the number of local rows, i.e 'm'.
3967            For matrices that will be factored, you must leave room for (and set)
3968            the diagonal entry even if it is zero.
3969 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
3970            submatrix (same value is used for all local rows).
3971 -  o_nnz - array containing the number of nonzeros in the various rows of the
3972            OFF-DIAGONAL portion of the local submatrix (possibly different for
3973            each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero
3974            structure. The size of this array is equal to the number
3975            of local rows, i.e 'm'.
3976 
3977    If the *_nnz parameter is given then the *_nz parameter is ignored
3978 
3979    The AIJ format (also called the Yale sparse matrix format or
3980    compressed row storage (CSR)), is fully compatible with standard Fortran 77
3981    storage.  The stored row and column indices begin with zero.
3982    See Users-Manual: ch_mat for details.
3983 
3984    The parallel matrix is partitioned such that the first m0 rows belong to
3985    process 0, the next m1 rows belong to process 1, the next m2 rows belong
3986    to process 2 etc.. where m0,m1,m2... are the input parameter 'm'.
3987 
3988    The DIAGONAL portion of the local submatrix of a processor can be defined
3989    as the submatrix which is obtained by extraction the part corresponding to
3990    the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the
3991    first row that belongs to the processor, r2 is the last row belonging to
3992    the this processor, and c1-c2 is range of indices of the local part of a
3993    vector suitable for applying the matrix to.  This is an mxn matrix.  In the
3994    common case of a square matrix, the row and column ranges are the same and
3995    the DIAGONAL part is also square. The remaining portion of the local
3996    submatrix (mxN) constitute the OFF-DIAGONAL portion.
3997 
3998    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
3999 
4000    You can call MatGetInfo() to get information on how effective the preallocation was;
4001    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
4002    You can also run with the option -info and look for messages with the string
4003    malloc in them to see if additional memory allocation was needed.
4004 
4005    Example usage:
4006 
4007    Consider the following 8x8 matrix with 34 non-zero values, that is
4008    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4009    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4010    as follows:
4011 
4012 .vb
4013             1  2  0  |  0  3  0  |  0  4
4014     Proc0   0  5  6  |  7  0  0  |  8  0
4015             9  0 10  | 11  0  0  | 12  0
4016     -------------------------------------
4017            13  0 14  | 15 16 17  |  0  0
4018     Proc1   0 18  0  | 19 20 21  |  0  0
4019             0  0  0  | 22 23  0  | 24  0
4020     -------------------------------------
4021     Proc2  25 26 27  |  0  0 28  | 29  0
4022            30  0  0  | 31 32 33  |  0 34
4023 .ve
4024 
4025    This can be represented as a collection of submatrices as:
4026 
4027 .vb
4028       A B C
4029       D E F
4030       G H I
4031 .ve
4032 
4033    Where the submatrices A,B,C are owned by proc0, D,E,F are
4034    owned by proc1, G,H,I are owned by proc2.
4035 
4036    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4037    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4038    The 'M','N' parameters are 8,8, and have the same values on all procs.
4039 
4040    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4041    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4042    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4043    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4044    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4045    matrix, ans [DF] as another SeqAIJ matrix.
4046 
4047    When d_nz, o_nz parameters are specified, d_nz storage elements are
4048    allocated for every row of the local diagonal submatrix, and o_nz
4049    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4050    One way to choose d_nz and o_nz is to use the max nonzerors per local
4051    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4052    In this case, the values of d_nz,o_nz are:
4053 .vb
4054      proc0 : dnz = 2, o_nz = 2
4055      proc1 : dnz = 3, o_nz = 2
4056      proc2 : dnz = 1, o_nz = 4
4057 .ve
4058    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4059    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4060    for proc3. i.e we are using 12+15+10=37 storage locations to store
4061    34 values.
4062 
4063    When d_nnz, o_nnz parameters are specified, the storage is specified
4064    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4065    In the above case the values for d_nnz,o_nnz are:
4066 .vb
4067      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4068      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4069      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4070 .ve
4071    Here the space allocated is sum of all the above values i.e 34, and
4072    hence pre-allocation is perfect.
4073 
4074    Level: intermediate
4075 
4076 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateAIJ(), MatMPIAIJSetPreallocationCSR(),
4077           MATMPIAIJ, MatGetInfo(), PetscSplitOwnership()
4078 @*/
4079 PetscErrorCode MatMPIAIJSetPreallocation(Mat B,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
4080 {
4081   PetscErrorCode ierr;
4082 
4083   PetscFunctionBegin;
4084   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
4085   PetscValidType(B,1);
4086   ierr = PetscTryMethod(B,"MatMPIAIJSetPreallocation_C",(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
4087   PetscFunctionReturn(0);
4088 }
4089 
4090 /*@
4091      MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard
4092          CSR format for the local rows.
4093 
4094    Collective
4095 
4096    Input Parameters:
4097 +  comm - MPI communicator
4098 .  m - number of local rows (Cannot be PETSC_DECIDE)
4099 .  n - This value should be the same as the local size used in creating the
4100        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4101        calculated if N is given) For square matrices n is almost always m.
4102 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4103 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4104 .   i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
4105 .   j - column indices
4106 -   a - matrix values
4107 
4108    Output Parameter:
4109 .   mat - the matrix
4110 
4111    Level: intermediate
4112 
4113    Notes:
4114        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
4115      thus you CANNOT change the matrix entries by changing the values of a[] after you have
4116      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
4117 
4118        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
4119 
4120        The format which is used for the sparse matrix input, is equivalent to a
4121     row-major ordering.. i.e for the following matrix, the input data expected is
4122     as shown
4123 
4124        Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays
4125 
4126 $        1 0 0
4127 $        2 0 3     P0
4128 $       -------
4129 $        4 5 6     P1
4130 $
4131 $     Process0 [P0]: rows_owned=[0,1]
4132 $        i =  {0,1,3}  [size = nrow+1  = 2+1]
4133 $        j =  {0,0,2}  [size = 3]
4134 $        v =  {1,2,3}  [size = 3]
4135 $
4136 $     Process1 [P1]: rows_owned=[2]
4137 $        i =  {0,3}    [size = nrow+1  = 1+1]
4138 $        j =  {0,1,2}  [size = 3]
4139 $        v =  {4,5,6}  [size = 3]
4140 
4141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4142           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4143 @*/
4144 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
4145 {
4146   PetscErrorCode ierr;
4147 
4148   PetscFunctionBegin;
4149   if (i && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4150   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4151   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
4152   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
4153   /* ierr = MatSetBlockSizes(M,bs,cbs);CHKERRQ(ierr); */
4154   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
4155   ierr = MatMPIAIJSetPreallocationCSR(*mat,i,j,a);CHKERRQ(ierr);
4156   PetscFunctionReturn(0);
4157 }
4158 
4159 /*@
4160      MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard
4161          CSR format for the local rows. Only the numerical values are updated the other arrays must be identical
4162 
4163    Collective
4164 
4165    Input Parameters:
4166 +  mat - the matrix
4167 .  m - number of local rows (Cannot be PETSC_DECIDE)
4168 .  n - This value should be the same as the local size used in creating the
4169        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4170        calculated if N is given) For square matrices n is almost always m.
4171 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4172 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4173 .  Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix
4174 .  J - column indices
4175 -  v - matrix values
4176 
4177    Level: intermediate
4178 
4179 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4180           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays(), MatUpdateMPIAIJWithArrays()
4181 @*/
4182 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt Ii[],const PetscInt J[],const PetscScalar v[])
4183 {
4184   PetscErrorCode ierr;
4185   PetscInt       cstart,nnz,i,j;
4186   PetscInt       *ld;
4187   PetscBool      nooffprocentries;
4188   Mat_MPIAIJ     *Aij = (Mat_MPIAIJ*)mat->data;
4189   Mat_SeqAIJ     *Ad  = (Mat_SeqAIJ*)Aij->A->data, *Ao  = (Mat_SeqAIJ*)Aij->B->data;
4190   PetscScalar    *ad = Ad->a, *ao = Ao->a;
4191   const PetscInt *Adi = Ad->i;
4192   PetscInt       ldi,Iii,md;
4193 
4194   PetscFunctionBegin;
4195   if (Ii[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
4196   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
4197   if (m != mat->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()");
4198   if (n != mat->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()");
4199 
4200   cstart = mat->cmap->rstart;
4201   if (!Aij->ld) {
4202     /* count number of entries below block diagonal */
4203     ierr    = PetscCalloc1(m,&ld);CHKERRQ(ierr);
4204     Aij->ld = ld;
4205     for (i=0; i<m; i++) {
4206       nnz  = Ii[i+1]- Ii[i];
4207       j     = 0;
4208       while  (J[j] < cstart && j < nnz) {j++;}
4209       J    += nnz;
4210       ld[i] = j;
4211     }
4212   } else {
4213     ld = Aij->ld;
4214   }
4215 
4216   for (i=0; i<m; i++) {
4217     nnz  = Ii[i+1]- Ii[i];
4218     Iii  = Ii[i];
4219     ldi  = ld[i];
4220     md   = Adi[i+1]-Adi[i];
4221     ierr = PetscArraycpy(ao,v + Iii,ldi);CHKERRQ(ierr);
4222     ierr = PetscArraycpy(ad,v + Iii + ldi,md);CHKERRQ(ierr);
4223     ierr = PetscArraycpy(ao + ldi,v + Iii + ldi + md,nnz - ldi - md);CHKERRQ(ierr);
4224     ad  += md;
4225     ao  += nnz - md;
4226   }
4227   nooffprocentries      = mat->nooffprocentries;
4228   mat->nooffprocentries = PETSC_TRUE;
4229   ierr = PetscObjectStateIncrease((PetscObject)Aij->A);CHKERRQ(ierr);
4230   ierr = PetscObjectStateIncrease((PetscObject)Aij->B);CHKERRQ(ierr);
4231   ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr);
4232   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4233   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4234   mat->nooffprocentries = nooffprocentries;
4235   PetscFunctionReturn(0);
4236 }
4237 
4238 /*@C
4239    MatCreateAIJ - Creates a sparse parallel matrix in AIJ format
4240    (the default parallel PETSc format).  For good matrix assembly performance
4241    the user should preallocate the matrix storage by setting the parameters
4242    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
4243    performance can be increased by more than a factor of 50.
4244 
4245    Collective
4246 
4247    Input Parameters:
4248 +  comm - MPI communicator
4249 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
4250            This value should be the same as the local size used in creating the
4251            y vector for the matrix-vector product y = Ax.
4252 .  n - This value should be the same as the local size used in creating the
4253        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
4254        calculated if N is given) For square matrices n is almost always m.
4255 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
4256 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
4257 .  d_nz  - number of nonzeros per row in DIAGONAL portion of local submatrix
4258            (same value is used for all local rows)
4259 .  d_nnz - array containing the number of nonzeros in the various rows of the
4260            DIAGONAL portion of the local submatrix (possibly different for each row)
4261            or NULL, if d_nz is used to specify the nonzero structure.
4262            The size of this array is equal to the number of local rows, i.e 'm'.
4263 .  o_nz  - number of nonzeros per row in the OFF-DIAGONAL portion of local
4264            submatrix (same value is used for all local rows).
4265 -  o_nnz - array containing the number of nonzeros in the various rows of the
4266            OFF-DIAGONAL portion of the local submatrix (possibly different for
4267            each row) or NULL, if o_nz is used to specify the nonzero
4268            structure. The size of this array is equal to the number
4269            of local rows, i.e 'm'.
4270 
4271    Output Parameter:
4272 .  A - the matrix
4273 
4274    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
4275    MatXXXXSetPreallocation() paradigm instead of this routine directly.
4276    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
4277 
4278    Notes:
4279    If the *_nnz parameter is given then the *_nz parameter is ignored
4280 
4281    m,n,M,N parameters specify the size of the matrix, and its partitioning across
4282    processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate
4283    storage requirements for this matrix.
4284 
4285    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one
4286    processor than it must be used on all processors that share the object for
4287    that argument.
4288 
4289    The user MUST specify either the local or global matrix dimensions
4290    (possibly both).
4291 
4292    The parallel matrix is partitioned across processors such that the
4293    first m0 rows belong to process 0, the next m1 rows belong to
4294    process 1, the next m2 rows belong to process 2 etc.. where
4295    m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores
4296    values corresponding to [m x N] submatrix.
4297 
4298    The columns are logically partitioned with the n0 columns belonging
4299    to 0th partition, the next n1 columns belonging to the next
4300    partition etc.. where n0,n1,n2... are the input parameter 'n'.
4301 
4302    The DIAGONAL portion of the local submatrix on any given processor
4303    is the submatrix corresponding to the rows and columns m,n
4304    corresponding to the given processor. i.e diagonal matrix on
4305    process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1]
4306    etc. The remaining portion of the local submatrix [m x (N-n)]
4307    constitute the OFF-DIAGONAL portion. The example below better
4308    illustrates this concept.
4309 
4310    For a square global matrix we define each processor's diagonal portion
4311    to be its local rows and the corresponding columns (a square submatrix);
4312    each processor's off-diagonal portion encompasses the remainder of the
4313    local matrix (a rectangular submatrix).
4314 
4315    If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored.
4316 
4317    When calling this routine with a single process communicator, a matrix of
4318    type SEQAIJ is returned.  If a matrix of type MPIAIJ is desired for this
4319    type of communicator, use the construction mechanism
4320 .vb
4321      MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...);
4322 .ve
4323 
4324 $     MatCreate(...,&A);
4325 $     MatSetType(A,MATMPIAIJ);
4326 $     MatSetSizes(A, m,n,M,N);
4327 $     MatMPIAIJSetPreallocation(A,...);
4328 
4329    By default, this format uses inodes (identical nodes) when possible.
4330    We search for consecutive rows with the same nonzero structure, thereby
4331    reusing matrix information to achieve increased efficiency.
4332 
4333    Options Database Keys:
4334 +  -mat_no_inode  - Do not use inodes
4335 -  -mat_inode_limit <limit> - Sets inode limit (max limit=5)
4336 
4337 
4338 
4339    Example usage:
4340 
4341    Consider the following 8x8 matrix with 34 non-zero values, that is
4342    assembled across 3 processors. Lets assume that proc0 owns 3 rows,
4343    proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown
4344    as follows
4345 
4346 .vb
4347             1  2  0  |  0  3  0  |  0  4
4348     Proc0   0  5  6  |  7  0  0  |  8  0
4349             9  0 10  | 11  0  0  | 12  0
4350     -------------------------------------
4351            13  0 14  | 15 16 17  |  0  0
4352     Proc1   0 18  0  | 19 20 21  |  0  0
4353             0  0  0  | 22 23  0  | 24  0
4354     -------------------------------------
4355     Proc2  25 26 27  |  0  0 28  | 29  0
4356            30  0  0  | 31 32 33  |  0 34
4357 .ve
4358 
4359    This can be represented as a collection of submatrices as
4360 
4361 .vb
4362       A B C
4363       D E F
4364       G H I
4365 .ve
4366 
4367    Where the submatrices A,B,C are owned by proc0, D,E,F are
4368    owned by proc1, G,H,I are owned by proc2.
4369 
4370    The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4371    The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively.
4372    The 'M','N' parameters are 8,8, and have the same values on all procs.
4373 
4374    The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are
4375    submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices
4376    corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively.
4377    Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL
4378    part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ
4379    matrix, ans [DF] as another SeqAIJ matrix.
4380 
4381    When d_nz, o_nz parameters are specified, d_nz storage elements are
4382    allocated for every row of the local diagonal submatrix, and o_nz
4383    storage locations are allocated for every row of the OFF-DIAGONAL submat.
4384    One way to choose d_nz and o_nz is to use the max nonzerors per local
4385    rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices.
4386    In this case, the values of d_nz,o_nz are
4387 .vb
4388      proc0 : dnz = 2, o_nz = 2
4389      proc1 : dnz = 3, o_nz = 2
4390      proc2 : dnz = 1, o_nz = 4
4391 .ve
4392    We are allocating m*(d_nz+o_nz) storage locations for every proc. This
4393    translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10
4394    for proc3. i.e we are using 12+15+10=37 storage locations to store
4395    34 values.
4396 
4397    When d_nnz, o_nnz parameters are specified, the storage is specified
4398    for every row, coresponding to both DIAGONAL and OFF-DIAGONAL submatrices.
4399    In the above case the values for d_nnz,o_nnz are
4400 .vb
4401      proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2]
4402      proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1]
4403      proc2: d_nnz = [1,1]   and o_nnz = [4,4]
4404 .ve
4405    Here the space allocated is sum of all the above values i.e 34, and
4406    hence pre-allocation is perfect.
4407 
4408    Level: intermediate
4409 
4410 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
4411           MATMPIAIJ, MatCreateMPIAIJWithArrays()
4412 @*/
4413 PetscErrorCode  MatCreateAIJ(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
4414 {
4415   PetscErrorCode ierr;
4416   PetscMPIInt    size;
4417 
4418   PetscFunctionBegin;
4419   ierr = MatCreate(comm,A);CHKERRQ(ierr);
4420   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
4421   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4422   if (size > 1) {
4423     ierr = MatSetType(*A,MATMPIAIJ);CHKERRQ(ierr);
4424     ierr = MatMPIAIJSetPreallocation(*A,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
4425   } else {
4426     ierr = MatSetType(*A,MATSEQAIJ);CHKERRQ(ierr);
4427     ierr = MatSeqAIJSetPreallocation(*A,d_nz,d_nnz);CHKERRQ(ierr);
4428   }
4429   PetscFunctionReturn(0);
4430 }
4431 
4432 /*@C
4433   MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix
4434 
4435   Not collective
4436 
4437   Input Parameter:
4438 . A - The MPIAIJ matrix
4439 
4440   Output Parameters:
4441 + Ad - The local diagonal block as a SeqAIJ matrix
4442 . Ao - The local off-diagonal block as a SeqAIJ matrix
4443 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix
4444 
4445   Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns
4446   in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is
4447   the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these
4448   local column numbers to global column numbers in the original matrix.
4449 
4450   Level: intermediate
4451 
4452 .seealso: MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed(), MatCreateAIJ(), MATMPIAIJ, MATSEQAIJ
4453 @*/
4454 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A,Mat *Ad,Mat *Ao,const PetscInt *colmap[])
4455 {
4456   Mat_MPIAIJ     *a = (Mat_MPIAIJ*)A->data;
4457   PetscBool      flg;
4458   PetscErrorCode ierr;
4459 
4460   PetscFunctionBegin;
4461   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&flg);CHKERRQ(ierr);
4462   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"This function requires a MATMPIAIJ matrix as input");
4463   if (Ad)     *Ad     = a->A;
4464   if (Ao)     *Ao     = a->B;
4465   if (colmap) *colmap = a->garray;
4466   PetscFunctionReturn(0);
4467 }
4468 
4469 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
4470 {
4471   PetscErrorCode ierr;
4472   PetscInt       m,N,i,rstart,nnz,Ii;
4473   PetscInt       *indx;
4474   PetscScalar    *values;
4475 
4476   PetscFunctionBegin;
4477   ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr);
4478   if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
4479     PetscInt       *dnz,*onz,sum,bs,cbs;
4480 
4481     if (n == PETSC_DECIDE) {
4482       ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr);
4483     }
4484     /* Check sum(n) = N */
4485     ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4486     if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N);
4487 
4488     ierr    = MPI_Scan(&m, &rstart,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr);
4489     rstart -= m;
4490 
4491     ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4492     for (i=0; i<m; i++) {
4493       ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4494       ierr = MatPreallocateSet(i+rstart,nnz,indx,dnz,onz);CHKERRQ(ierr);
4495       ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,NULL);CHKERRQ(ierr);
4496     }
4497 
4498     ierr = MatCreate(comm,outmat);CHKERRQ(ierr);
4499     ierr = MatSetSizes(*outmat,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4500     ierr = MatGetBlockSizes(inmat,&bs,&cbs);CHKERRQ(ierr);
4501     ierr = MatSetBlockSizes(*outmat,bs,cbs);CHKERRQ(ierr);
4502     ierr = MatSetType(*outmat,MATAIJ);CHKERRQ(ierr);
4503     ierr = MatSeqAIJSetPreallocation(*outmat,0,dnz);CHKERRQ(ierr);
4504     ierr = MatMPIAIJSetPreallocation(*outmat,0,dnz,0,onz);CHKERRQ(ierr);
4505     ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4506   }
4507 
4508   /* numeric phase */
4509   ierr = MatGetOwnershipRange(*outmat,&rstart,NULL);CHKERRQ(ierr);
4510   for (i=0; i<m; i++) {
4511     ierr = MatGetRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4512     Ii   = i + rstart;
4513     ierr = MatSetValues(*outmat,1,&Ii,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4514     ierr = MatRestoreRow_SeqAIJ(inmat,i,&nnz,&indx,&values);CHKERRQ(ierr);
4515   }
4516   ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4517   ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4518   PetscFunctionReturn(0);
4519 }
4520 
4521 PetscErrorCode MatFileSplit(Mat A,char *outfile)
4522 {
4523   PetscErrorCode    ierr;
4524   PetscMPIInt       rank;
4525   PetscInt          m,N,i,rstart,nnz;
4526   size_t            len;
4527   const PetscInt    *indx;
4528   PetscViewer       out;
4529   char              *name;
4530   Mat               B;
4531   const PetscScalar *values;
4532 
4533   PetscFunctionBegin;
4534   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
4535   ierr = MatGetSize(A,NULL,&N);CHKERRQ(ierr);
4536   /* Should this be the type of the diagonal block of A? */
4537   ierr = MatCreate(PETSC_COMM_SELF,&B);CHKERRQ(ierr);
4538   ierr = MatSetSizes(B,m,N,m,N);CHKERRQ(ierr);
4539   ierr = MatSetBlockSizesFromMats(B,A,A);CHKERRQ(ierr);
4540   ierr = MatSetType(B,MATSEQAIJ);CHKERRQ(ierr);
4541   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
4542   ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
4543   for (i=0; i<m; i++) {
4544     ierr = MatGetRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4545     ierr = MatSetValues(B,1,&i,nnz,indx,values,INSERT_VALUES);CHKERRQ(ierr);
4546     ierr = MatRestoreRow(A,i+rstart,&nnz,&indx,&values);CHKERRQ(ierr);
4547   }
4548   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4549   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4550 
4551   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRMPI(ierr);
4552   ierr = PetscStrlen(outfile,&len);CHKERRQ(ierr);
4553   ierr = PetscMalloc1(len+6,&name);CHKERRQ(ierr);
4554   ierr = PetscSNPrintf(name,len+6,"%s.%d",outfile,rank);CHKERRQ(ierr);
4555   ierr = PetscViewerBinaryOpen(PETSC_COMM_SELF,name,FILE_MODE_APPEND,&out);CHKERRQ(ierr);
4556   ierr = PetscFree(name);CHKERRQ(ierr);
4557   ierr = MatView(B,out);CHKERRQ(ierr);
4558   ierr = PetscViewerDestroy(&out);CHKERRQ(ierr);
4559   ierr = MatDestroy(&B);CHKERRQ(ierr);
4560   PetscFunctionReturn(0);
4561 }
4562 
4563 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data)
4564 {
4565   PetscErrorCode      ierr;
4566   Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data;
4567 
4568   PetscFunctionBegin;
4569   if (!merge) PetscFunctionReturn(0);
4570   ierr = PetscFree(merge->id_r);CHKERRQ(ierr);
4571   ierr = PetscFree(merge->len_s);CHKERRQ(ierr);
4572   ierr = PetscFree(merge->len_r);CHKERRQ(ierr);
4573   ierr = PetscFree(merge->bi);CHKERRQ(ierr);
4574   ierr = PetscFree(merge->bj);CHKERRQ(ierr);
4575   ierr = PetscFree(merge->buf_ri[0]);CHKERRQ(ierr);
4576   ierr = PetscFree(merge->buf_ri);CHKERRQ(ierr);
4577   ierr = PetscFree(merge->buf_rj[0]);CHKERRQ(ierr);
4578   ierr = PetscFree(merge->buf_rj);CHKERRQ(ierr);
4579   ierr = PetscFree(merge->coi);CHKERRQ(ierr);
4580   ierr = PetscFree(merge->coj);CHKERRQ(ierr);
4581   ierr = PetscFree(merge->owners_co);CHKERRQ(ierr);
4582   ierr = PetscLayoutDestroy(&merge->rowmap);CHKERRQ(ierr);
4583   ierr = PetscFree(merge);CHKERRQ(ierr);
4584   PetscFunctionReturn(0);
4585 }
4586 
4587 #include <../src/mat/utils/freespace.h>
4588 #include <petscbt.h>
4589 
4590 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat,Mat mpimat)
4591 {
4592   PetscErrorCode      ierr;
4593   MPI_Comm            comm;
4594   Mat_SeqAIJ          *a  =(Mat_SeqAIJ*)seqmat->data;
4595   PetscMPIInt         size,rank,taga,*len_s;
4596   PetscInt            N=mpimat->cmap->N,i,j,*owners,*ai=a->i,*aj;
4597   PetscInt            proc,m;
4598   PetscInt            **buf_ri,**buf_rj;
4599   PetscInt            k,anzi,*bj_i,*bi,*bj,arow,bnzi,nextaj;
4600   PetscInt            nrows,**buf_ri_k,**nextrow,**nextai;
4601   MPI_Request         *s_waits,*r_waits;
4602   MPI_Status          *status;
4603   MatScalar           *aa=a->a;
4604   MatScalar           **abuf_r,*ba_i;
4605   Mat_Merge_SeqsToMPI *merge;
4606   PetscContainer      container;
4607 
4608   PetscFunctionBegin;
4609   ierr = PetscObjectGetComm((PetscObject)mpimat,&comm);CHKERRQ(ierr);
4610   ierr = PetscLogEventBegin(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4611 
4612   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4613   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4614 
4615   ierr = PetscObjectQuery((PetscObject)mpimat,"MatMergeSeqsToMPI",(PetscObject*)&container);CHKERRQ(ierr);
4616   if (!container) SETERRQ(PetscObjectComm((PetscObject)mpimat),PETSC_ERR_PLIB,"Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic");
4617   ierr = PetscContainerGetPointer(container,(void**)&merge);CHKERRQ(ierr);
4618 
4619   bi     = merge->bi;
4620   bj     = merge->bj;
4621   buf_ri = merge->buf_ri;
4622   buf_rj = merge->buf_rj;
4623 
4624   ierr   = PetscMalloc1(size,&status);CHKERRQ(ierr);
4625   owners = merge->rowmap->range;
4626   len_s  = merge->len_s;
4627 
4628   /* send and recv matrix values */
4629   /*-----------------------------*/
4630   ierr = PetscObjectGetNewTag((PetscObject)mpimat,&taga);CHKERRQ(ierr);
4631   ierr = PetscPostIrecvScalar(comm,taga,merge->nrecv,merge->id_r,merge->len_r,&abuf_r,&r_waits);CHKERRQ(ierr);
4632 
4633   ierr = PetscMalloc1(merge->nsend+1,&s_waits);CHKERRQ(ierr);
4634   for (proc=0,k=0; proc<size; proc++) {
4635     if (!len_s[proc]) continue;
4636     i    = owners[proc];
4637     ierr = MPI_Isend(aa+ai[i],len_s[proc],MPIU_MATSCALAR,proc,taga,comm,s_waits+k);CHKERRMPI(ierr);
4638     k++;
4639   }
4640 
4641   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,r_waits,status);CHKERRMPI(ierr);}
4642   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,s_waits,status);CHKERRMPI(ierr);}
4643   ierr = PetscFree(status);CHKERRQ(ierr);
4644 
4645   ierr = PetscFree(s_waits);CHKERRQ(ierr);
4646   ierr = PetscFree(r_waits);CHKERRQ(ierr);
4647 
4648   /* insert mat values of mpimat */
4649   /*----------------------------*/
4650   ierr = PetscMalloc1(N,&ba_i);CHKERRQ(ierr);
4651   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4652 
4653   for (k=0; k<merge->nrecv; k++) {
4654     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4655     nrows       = *(buf_ri_k[k]);
4656     nextrow[k]  = buf_ri_k[k]+1;  /* next row number of k-th recved i-structure */
4657     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4658   }
4659 
4660   /* set values of ba */
4661   m = merge->rowmap->n;
4662   for (i=0; i<m; i++) {
4663     arow = owners[rank] + i;
4664     bj_i = bj+bi[i];  /* col indices of the i-th row of mpimat */
4665     bnzi = bi[i+1] - bi[i];
4666     ierr = PetscArrayzero(ba_i,bnzi);CHKERRQ(ierr);
4667 
4668     /* add local non-zero vals of this proc's seqmat into ba */
4669     anzi   = ai[arow+1] - ai[arow];
4670     aj     = a->j + ai[arow];
4671     aa     = a->a + ai[arow];
4672     nextaj = 0;
4673     for (j=0; nextaj<anzi; j++) {
4674       if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4675         ba_i[j] += aa[nextaj++];
4676       }
4677     }
4678 
4679     /* add received vals into ba */
4680     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4681       /* i-th row */
4682       if (i == *nextrow[k]) {
4683         anzi   = *(nextai[k]+1) - *nextai[k];
4684         aj     = buf_rj[k] + *(nextai[k]);
4685         aa     = abuf_r[k] + *(nextai[k]);
4686         nextaj = 0;
4687         for (j=0; nextaj<anzi; j++) {
4688           if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */
4689             ba_i[j] += aa[nextaj++];
4690           }
4691         }
4692         nextrow[k]++; nextai[k]++;
4693       }
4694     }
4695     ierr = MatSetValues(mpimat,1,&arow,bnzi,bj_i,ba_i,INSERT_VALUES);CHKERRQ(ierr);
4696   }
4697   ierr = MatAssemblyBegin(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4698   ierr = MatAssemblyEnd(mpimat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
4699 
4700   ierr = PetscFree(abuf_r[0]);CHKERRQ(ierr);
4701   ierr = PetscFree(abuf_r);CHKERRQ(ierr);
4702   ierr = PetscFree(ba_i);CHKERRQ(ierr);
4703   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4704   ierr = PetscLogEventEnd(MAT_Seqstompinum,seqmat,0,0,0);CHKERRQ(ierr);
4705   PetscFunctionReturn(0);
4706 }
4707 
4708 PetscErrorCode  MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,Mat *mpimat)
4709 {
4710   PetscErrorCode      ierr;
4711   Mat                 B_mpi;
4712   Mat_SeqAIJ          *a=(Mat_SeqAIJ*)seqmat->data;
4713   PetscMPIInt         size,rank,tagi,tagj,*len_s,*len_si,*len_ri;
4714   PetscInt            **buf_rj,**buf_ri,**buf_ri_k;
4715   PetscInt            M=seqmat->rmap->n,N=seqmat->cmap->n,i,*owners,*ai=a->i,*aj=a->j;
4716   PetscInt            len,proc,*dnz,*onz,bs,cbs;
4717   PetscInt            k,anzi,*bi,*bj,*lnk,nlnk,arow,bnzi,nspacedouble=0;
4718   PetscInt            nrows,*buf_s,*buf_si,*buf_si_i,**nextrow,**nextai;
4719   MPI_Request         *si_waits,*sj_waits,*ri_waits,*rj_waits;
4720   MPI_Status          *status;
4721   PetscFreeSpaceList  free_space=NULL,current_space=NULL;
4722   PetscBT             lnkbt;
4723   Mat_Merge_SeqsToMPI *merge;
4724   PetscContainer      container;
4725 
4726   PetscFunctionBegin;
4727   ierr = PetscLogEventBegin(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4728 
4729   /* make sure it is a PETSc comm */
4730   ierr = PetscCommDuplicate(comm,&comm,NULL);CHKERRQ(ierr);
4731   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4732   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
4733 
4734   ierr = PetscNew(&merge);CHKERRQ(ierr);
4735   ierr = PetscMalloc1(size,&status);CHKERRQ(ierr);
4736 
4737   /* determine row ownership */
4738   /*---------------------------------------------------------*/
4739   ierr = PetscLayoutCreate(comm,&merge->rowmap);CHKERRQ(ierr);
4740   ierr = PetscLayoutSetLocalSize(merge->rowmap,m);CHKERRQ(ierr);
4741   ierr = PetscLayoutSetSize(merge->rowmap,M);CHKERRQ(ierr);
4742   ierr = PetscLayoutSetBlockSize(merge->rowmap,1);CHKERRQ(ierr);
4743   ierr = PetscLayoutSetUp(merge->rowmap);CHKERRQ(ierr);
4744   ierr = PetscMalloc1(size,&len_si);CHKERRQ(ierr);
4745   ierr = PetscMalloc1(size,&merge->len_s);CHKERRQ(ierr);
4746 
4747   m      = merge->rowmap->n;
4748   owners = merge->rowmap->range;
4749 
4750   /* determine the number of messages to send, their lengths */
4751   /*---------------------------------------------------------*/
4752   len_s = merge->len_s;
4753 
4754   len          = 0; /* length of buf_si[] */
4755   merge->nsend = 0;
4756   for (proc=0; proc<size; proc++) {
4757     len_si[proc] = 0;
4758     if (proc == rank) {
4759       len_s[proc] = 0;
4760     } else {
4761       len_si[proc] = owners[proc+1] - owners[proc] + 1;
4762       len_s[proc]  = ai[owners[proc+1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */
4763     }
4764     if (len_s[proc]) {
4765       merge->nsend++;
4766       nrows = 0;
4767       for (i=owners[proc]; i<owners[proc+1]; i++) {
4768         if (ai[i+1] > ai[i]) nrows++;
4769       }
4770       len_si[proc] = 2*(nrows+1);
4771       len         += len_si[proc];
4772     }
4773   }
4774 
4775   /* determine the number and length of messages to receive for ij-structure */
4776   /*-------------------------------------------------------------------------*/
4777   ierr = PetscGatherNumberOfMessages(comm,NULL,len_s,&merge->nrecv);CHKERRQ(ierr);
4778   ierr = PetscGatherMessageLengths2(comm,merge->nsend,merge->nrecv,len_s,len_si,&merge->id_r,&merge->len_r,&len_ri);CHKERRQ(ierr);
4779 
4780   /* post the Irecv of j-structure */
4781   /*-------------------------------*/
4782   ierr = PetscCommGetNewTag(comm,&tagj);CHKERRQ(ierr);
4783   ierr = PetscPostIrecvInt(comm,tagj,merge->nrecv,merge->id_r,merge->len_r,&buf_rj,&rj_waits);CHKERRQ(ierr);
4784 
4785   /* post the Isend of j-structure */
4786   /*--------------------------------*/
4787   ierr = PetscMalloc2(merge->nsend,&si_waits,merge->nsend,&sj_waits);CHKERRQ(ierr);
4788 
4789   for (proc=0, k=0; proc<size; proc++) {
4790     if (!len_s[proc]) continue;
4791     i    = owners[proc];
4792     ierr = MPI_Isend(aj+ai[i],len_s[proc],MPIU_INT,proc,tagj,comm,sj_waits+k);CHKERRMPI(ierr);
4793     k++;
4794   }
4795 
4796   /* receives and sends of j-structure are complete */
4797   /*------------------------------------------------*/
4798   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,rj_waits,status);CHKERRMPI(ierr);}
4799   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,sj_waits,status);CHKERRMPI(ierr);}
4800 
4801   /* send and recv i-structure */
4802   /*---------------------------*/
4803   ierr = PetscCommGetNewTag(comm,&tagi);CHKERRQ(ierr);
4804   ierr = PetscPostIrecvInt(comm,tagi,merge->nrecv,merge->id_r,len_ri,&buf_ri,&ri_waits);CHKERRQ(ierr);
4805 
4806   ierr   = PetscMalloc1(len+1,&buf_s);CHKERRQ(ierr);
4807   buf_si = buf_s;  /* points to the beginning of k-th msg to be sent */
4808   for (proc=0,k=0; proc<size; proc++) {
4809     if (!len_s[proc]) continue;
4810     /* form outgoing message for i-structure:
4811          buf_si[0]:                 nrows to be sent
4812                [1:nrows]:           row index (global)
4813                [nrows+1:2*nrows+1]: i-structure index
4814     */
4815     /*-------------------------------------------*/
4816     nrows       = len_si[proc]/2 - 1;
4817     buf_si_i    = buf_si + nrows+1;
4818     buf_si[0]   = nrows;
4819     buf_si_i[0] = 0;
4820     nrows       = 0;
4821     for (i=owners[proc]; i<owners[proc+1]; i++) {
4822       anzi = ai[i+1] - ai[i];
4823       if (anzi) {
4824         buf_si_i[nrows+1] = buf_si_i[nrows] + anzi; /* i-structure */
4825         buf_si[nrows+1]   = i-owners[proc]; /* local row index */
4826         nrows++;
4827       }
4828     }
4829     ierr = MPI_Isend(buf_si,len_si[proc],MPIU_INT,proc,tagi,comm,si_waits+k);CHKERRMPI(ierr);
4830     k++;
4831     buf_si += len_si[proc];
4832   }
4833 
4834   if (merge->nrecv) {ierr = MPI_Waitall(merge->nrecv,ri_waits,status);CHKERRMPI(ierr);}
4835   if (merge->nsend) {ierr = MPI_Waitall(merge->nsend,si_waits,status);CHKERRMPI(ierr);}
4836 
4837   ierr = PetscInfo2(seqmat,"nsend: %D, nrecv: %D\n",merge->nsend,merge->nrecv);CHKERRQ(ierr);
4838   for (i=0; i<merge->nrecv; i++) {
4839     ierr = PetscInfo3(seqmat,"recv len_ri=%D, len_rj=%D from [%D]\n",len_ri[i],merge->len_r[i],merge->id_r[i]);CHKERRQ(ierr);
4840   }
4841 
4842   ierr = PetscFree(len_si);CHKERRQ(ierr);
4843   ierr = PetscFree(len_ri);CHKERRQ(ierr);
4844   ierr = PetscFree(rj_waits);CHKERRQ(ierr);
4845   ierr = PetscFree2(si_waits,sj_waits);CHKERRQ(ierr);
4846   ierr = PetscFree(ri_waits);CHKERRQ(ierr);
4847   ierr = PetscFree(buf_s);CHKERRQ(ierr);
4848   ierr = PetscFree(status);CHKERRQ(ierr);
4849 
4850   /* compute a local seq matrix in each processor */
4851   /*----------------------------------------------*/
4852   /* allocate bi array and free space for accumulating nonzero column info */
4853   ierr  = PetscMalloc1(m+1,&bi);CHKERRQ(ierr);
4854   bi[0] = 0;
4855 
4856   /* create and initialize a linked list */
4857   nlnk = N+1;
4858   ierr = PetscLLCreate(N,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4859 
4860   /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */
4861   len  = ai[owners[rank+1]] - ai[owners[rank]];
4862   ierr = PetscFreeSpaceGet(PetscIntMultTruncate(2,len)+1,&free_space);CHKERRQ(ierr);
4863 
4864   current_space = free_space;
4865 
4866   /* determine symbolic info for each local row */
4867   ierr = PetscMalloc3(merge->nrecv,&buf_ri_k,merge->nrecv,&nextrow,merge->nrecv,&nextai);CHKERRQ(ierr);
4868 
4869   for (k=0; k<merge->nrecv; k++) {
4870     buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */
4871     nrows       = *buf_ri_k[k];
4872     nextrow[k]  = buf_ri_k[k] + 1;  /* next row number of k-th recved i-structure */
4873     nextai[k]   = buf_ri_k[k] + (nrows + 1); /* poins to the next i-structure of k-th recved i-structure  */
4874   }
4875 
4876   ierr = MatPreallocateInitialize(comm,m,n,dnz,onz);CHKERRQ(ierr);
4877   len  = 0;
4878   for (i=0; i<m; i++) {
4879     bnzi = 0;
4880     /* add local non-zero cols of this proc's seqmat into lnk */
4881     arow  = owners[rank] + i;
4882     anzi  = ai[arow+1] - ai[arow];
4883     aj    = a->j + ai[arow];
4884     ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4885     bnzi += nlnk;
4886     /* add received col data into lnk */
4887     for (k=0; k<merge->nrecv; k++) { /* k-th received message */
4888       if (i == *nextrow[k]) { /* i-th row */
4889         anzi  = *(nextai[k]+1) - *nextai[k];
4890         aj    = buf_rj[k] + *nextai[k];
4891         ierr  = PetscLLAddSorted(anzi,aj,N,nlnk,lnk,lnkbt);CHKERRQ(ierr);
4892         bnzi += nlnk;
4893         nextrow[k]++; nextai[k]++;
4894       }
4895     }
4896     if (len < bnzi) len = bnzi;  /* =max(bnzi) */
4897 
4898     /* if free space is not available, make more free space */
4899     if (current_space->local_remaining<bnzi) {
4900       ierr = PetscFreeSpaceGet(PetscIntSumTruncate(bnzi,current_space->total_array_size),&current_space);CHKERRQ(ierr);
4901       nspacedouble++;
4902     }
4903     /* copy data into free space, then initialize lnk */
4904     ierr = PetscLLClean(N,N,bnzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
4905     ierr = MatPreallocateSet(i+owners[rank],bnzi,current_space->array,dnz,onz);CHKERRQ(ierr);
4906 
4907     current_space->array           += bnzi;
4908     current_space->local_used      += bnzi;
4909     current_space->local_remaining -= bnzi;
4910 
4911     bi[i+1] = bi[i] + bnzi;
4912   }
4913 
4914   ierr = PetscFree3(buf_ri_k,nextrow,nextai);CHKERRQ(ierr);
4915 
4916   ierr = PetscMalloc1(bi[m]+1,&bj);CHKERRQ(ierr);
4917   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
4918   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
4919 
4920   /* create symbolic parallel matrix B_mpi */
4921   /*---------------------------------------*/
4922   ierr = MatGetBlockSizes(seqmat,&bs,&cbs);CHKERRQ(ierr);
4923   ierr = MatCreate(comm,&B_mpi);CHKERRQ(ierr);
4924   if (n==PETSC_DECIDE) {
4925     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,N);CHKERRQ(ierr);
4926   } else {
4927     ierr = MatSetSizes(B_mpi,m,n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4928   }
4929   ierr = MatSetBlockSizes(B_mpi,bs,cbs);CHKERRQ(ierr);
4930   ierr = MatSetType(B_mpi,MATMPIAIJ);CHKERRQ(ierr);
4931   ierr = MatMPIAIJSetPreallocation(B_mpi,0,dnz,0,onz);CHKERRQ(ierr);
4932   ierr = MatPreallocateFinalize(dnz,onz);CHKERRQ(ierr);
4933   ierr = MatSetOption(B_mpi,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
4934 
4935   /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */
4936   B_mpi->assembled  = PETSC_FALSE;
4937   merge->bi         = bi;
4938   merge->bj         = bj;
4939   merge->buf_ri     = buf_ri;
4940   merge->buf_rj     = buf_rj;
4941   merge->coi        = NULL;
4942   merge->coj        = NULL;
4943   merge->owners_co  = NULL;
4944 
4945   ierr = PetscCommDestroy(&comm);CHKERRQ(ierr);
4946 
4947   /* attach the supporting struct to B_mpi for reuse */
4948   ierr    = PetscContainerCreate(PETSC_COMM_SELF,&container);CHKERRQ(ierr);
4949   ierr    = PetscContainerSetPointer(container,merge);CHKERRQ(ierr);
4950   ierr    = PetscContainerSetUserDestroy(container,MatDestroy_MPIAIJ_SeqsToMPI);CHKERRQ(ierr);
4951   ierr    = PetscObjectCompose((PetscObject)B_mpi,"MatMergeSeqsToMPI",(PetscObject)container);CHKERRQ(ierr);
4952   ierr    = PetscContainerDestroy(&container);CHKERRQ(ierr);
4953   *mpimat = B_mpi;
4954 
4955   ierr = PetscLogEventEnd(MAT_Seqstompisym,seqmat,0,0,0);CHKERRQ(ierr);
4956   PetscFunctionReturn(0);
4957 }
4958 
4959 /*@C
4960       MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential
4961                  matrices from each processor
4962 
4963     Collective
4964 
4965    Input Parameters:
4966 +    comm - the communicators the parallel matrix will live on
4967 .    seqmat - the input sequential matrices
4968 .    m - number of local rows (or PETSC_DECIDE)
4969 .    n - number of local columns (or PETSC_DECIDE)
4970 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
4971 
4972    Output Parameter:
4973 .    mpimat - the parallel matrix generated
4974 
4975     Level: advanced
4976 
4977    Notes:
4978      The dimensions of the sequential matrix in each processor MUST be the same.
4979      The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be
4980      destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat.
4981 @*/
4982 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm,Mat seqmat,PetscInt m,PetscInt n,MatReuse scall,Mat *mpimat)
4983 {
4984   PetscErrorCode ierr;
4985   PetscMPIInt    size;
4986 
4987   PetscFunctionBegin;
4988   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
4989   if (size == 1) {
4990     ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4991     if (scall == MAT_INITIAL_MATRIX) {
4992       ierr = MatDuplicate(seqmat,MAT_COPY_VALUES,mpimat);CHKERRQ(ierr);
4993     } else {
4994       ierr = MatCopy(seqmat,*mpimat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
4995     }
4996     ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
4997     PetscFunctionReturn(0);
4998   }
4999   ierr = PetscLogEventBegin(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5000   if (scall == MAT_INITIAL_MATRIX) {
5001     ierr = MatCreateMPIAIJSumSeqAIJSymbolic(comm,seqmat,m,n,mpimat);CHKERRQ(ierr);
5002   }
5003   ierr = MatCreateMPIAIJSumSeqAIJNumeric(seqmat,*mpimat);CHKERRQ(ierr);
5004   ierr = PetscLogEventEnd(MAT_Seqstompi,seqmat,0,0,0);CHKERRQ(ierr);
5005   PetscFunctionReturn(0);
5006 }
5007 
5008 /*@
5009      MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5010           mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained
5011           with MatGetSize()
5012 
5013     Not Collective
5014 
5015    Input Parameters:
5016 +    A - the matrix
5017 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5018 
5019    Output Parameter:
5020 .    A_loc - the local sequential matrix generated
5021 
5022     Level: developer
5023 
5024    Notes:
5025      When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A.
5026      If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called.
5027      This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely
5028      modify the values of the returned A_loc.
5029 
5030 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMatCondensed(), MatMPIAIJGetLocalMatMerge()
5031 @*/
5032 PetscErrorCode MatMPIAIJGetLocalMat(Mat A,MatReuse scall,Mat *A_loc)
5033 {
5034   PetscErrorCode    ierr;
5035   Mat_MPIAIJ        *mpimat=(Mat_MPIAIJ*)A->data;
5036   Mat_SeqAIJ        *mat,*a,*b;
5037   PetscInt          *ai,*aj,*bi,*bj,*cmap=mpimat->garray;
5038   const PetscScalar *aa,*ba,*aav,*bav;
5039   PetscScalar       *ca,*cam;
5040   PetscMPIInt       size;
5041   PetscInt          am=A->rmap->n,i,j,k,cstart=A->cmap->rstart;
5042   PetscInt          *ci,*cj,col,ncols_d,ncols_o,jo;
5043   PetscBool         match;
5044 
5045   PetscFunctionBegin;
5046   ierr = PetscStrbeginswith(((PetscObject)A)->type_name,MATMPIAIJ,&match);CHKERRQ(ierr);
5047   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5048   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
5049   if (size == 1) {
5050     if (scall == MAT_INITIAL_MATRIX) {
5051       ierr = PetscObjectReference((PetscObject)mpimat->A);CHKERRQ(ierr);
5052       *A_loc = mpimat->A;
5053     } else if (scall == MAT_REUSE_MATRIX) {
5054       ierr = MatCopy(mpimat->A,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5055     }
5056     PetscFunctionReturn(0);
5057   }
5058 
5059   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5060   a = (Mat_SeqAIJ*)(mpimat->A)->data;
5061   b = (Mat_SeqAIJ*)(mpimat->B)->data;
5062   ai = a->i; aj = a->j; bi = b->i; bj = b->j;
5063   ierr = MatSeqAIJGetArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5064   ierr = MatSeqAIJGetArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5065   aa   = aav;
5066   ba   = bav;
5067   if (scall == MAT_INITIAL_MATRIX) {
5068     ierr  = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5069     ci[0] = 0;
5070     for (i=0; i<am; i++) {
5071       ci[i+1] = ci[i] + (ai[i+1] - ai[i]) + (bi[i+1] - bi[i]);
5072     }
5073     ierr = PetscMalloc1(1+ci[am],&cj);CHKERRQ(ierr);
5074     ierr = PetscMalloc1(1+ci[am],&ca);CHKERRQ(ierr);
5075     k    = 0;
5076     for (i=0; i<am; i++) {
5077       ncols_o = bi[i+1] - bi[i];
5078       ncols_d = ai[i+1] - ai[i];
5079       /* off-diagonal portion of A */
5080       for (jo=0; jo<ncols_o; jo++) {
5081         col = cmap[*bj];
5082         if (col >= cstart) break;
5083         cj[k]   = col; bj++;
5084         ca[k++] = *ba++;
5085       }
5086       /* diagonal portion of A */
5087       for (j=0; j<ncols_d; j++) {
5088         cj[k]   = cstart + *aj++;
5089         ca[k++] = *aa++;
5090       }
5091       /* off-diagonal portion of A */
5092       for (j=jo; j<ncols_o; j++) {
5093         cj[k]   = cmap[*bj++];
5094         ca[k++] = *ba++;
5095       }
5096     }
5097     /* put together the new matrix */
5098     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,A->cmap->N,ci,cj,ca,A_loc);CHKERRQ(ierr);
5099     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5100     /* Since these are PETSc arrays, change flags to free them as necessary. */
5101     mat          = (Mat_SeqAIJ*)(*A_loc)->data;
5102     mat->free_a  = PETSC_TRUE;
5103     mat->free_ij = PETSC_TRUE;
5104     mat->nonew   = 0;
5105   } else if (scall == MAT_REUSE_MATRIX) {
5106     mat=(Mat_SeqAIJ*)(*A_loc)->data;
5107 #if defined(PETSC_USE_DEVICE)
5108     (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5109 #endif
5110     ci = mat->i; cj = mat->j; cam = mat->a;
5111     for (i=0; i<am; i++) {
5112       /* off-diagonal portion of A */
5113       ncols_o = bi[i+1] - bi[i];
5114       for (jo=0; jo<ncols_o; jo++) {
5115         col = cmap[*bj];
5116         if (col >= cstart) break;
5117         *cam++ = *ba++; bj++;
5118       }
5119       /* diagonal portion of A */
5120       ncols_d = ai[i+1] - ai[i];
5121       for (j=0; j<ncols_d; j++) *cam++ = *aa++;
5122       /* off-diagonal portion of A */
5123       for (j=jo; j<ncols_o; j++) {
5124         *cam++ = *ba++; bj++;
5125       }
5126     }
5127   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5128   ierr = MatSeqAIJRestoreArrayRead(mpimat->A,&aav);CHKERRQ(ierr);
5129   ierr = MatSeqAIJRestoreArrayRead(mpimat->B,&bav);CHKERRQ(ierr);
5130   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5131   PetscFunctionReturn(0);
5132 }
5133 
5134 /*@
5135      MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with
5136           mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part
5137 
5138     Not Collective
5139 
5140    Input Parameters:
5141 +    A - the matrix
5142 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5143 
5144    Output Parameter:
5145 +    glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL)
5146 -    A_loc - the local sequential matrix generated
5147 
5148     Level: developer
5149 
5150    Notes:
5151      This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering)
5152 
5153 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat(), MatMPIAIJGetLocalMatCondensed()
5154 
5155 @*/
5156 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A,MatReuse scall,IS *glob,Mat *A_loc)
5157 {
5158   PetscErrorCode ierr;
5159   Mat            Ao,Ad;
5160   const PetscInt *cmap;
5161   PetscMPIInt    size;
5162   PetscErrorCode (*f)(Mat,MatReuse,IS*,Mat*);
5163 
5164   PetscFunctionBegin;
5165   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&cmap);CHKERRQ(ierr);
5166   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
5167   if (size == 1) {
5168     if (scall == MAT_INITIAL_MATRIX) {
5169       ierr = PetscObjectReference((PetscObject)Ad);CHKERRQ(ierr);
5170       *A_loc = Ad;
5171     } else if (scall == MAT_REUSE_MATRIX) {
5172       ierr = MatCopy(Ad,*A_loc,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
5173     }
5174     if (glob) { ierr = ISCreateStride(PetscObjectComm((PetscObject)Ad),Ad->cmap->n,Ad->cmap->rstart,1,glob);CHKERRQ(ierr); }
5175     PetscFunctionReturn(0);
5176   }
5177   ierr = PetscObjectQueryFunction((PetscObject)A,"MatMPIAIJGetLocalMatMerge_C",&f);CHKERRQ(ierr);
5178   ierr = PetscLogEventBegin(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5179   if (f) {
5180     ierr = (*f)(A,scall,glob,A_loc);CHKERRQ(ierr);
5181   } else {
5182     Mat_SeqAIJ        *a = (Mat_SeqAIJ*)Ad->data;
5183     Mat_SeqAIJ        *b = (Mat_SeqAIJ*)Ao->data;
5184     Mat_SeqAIJ        *c;
5185     PetscInt          *ai = a->i, *aj = a->j;
5186     PetscInt          *bi = b->i, *bj = b->j;
5187     PetscInt          *ci,*cj;
5188     const PetscScalar *aa,*ba;
5189     PetscScalar       *ca;
5190     PetscInt          i,j,am,dn,on;
5191 
5192     ierr = MatGetLocalSize(Ad,&am,&dn);CHKERRQ(ierr);
5193     ierr = MatGetLocalSize(Ao,NULL,&on);CHKERRQ(ierr);
5194     ierr = MatSeqAIJGetArrayRead(Ad,&aa);CHKERRQ(ierr);
5195     ierr = MatSeqAIJGetArrayRead(Ao,&ba);CHKERRQ(ierr);
5196     if (scall == MAT_INITIAL_MATRIX) {
5197       PetscInt k;
5198       ierr = PetscMalloc1(1+am,&ci);CHKERRQ(ierr);
5199       ierr = PetscMalloc1(ai[am]+bi[am],&cj);CHKERRQ(ierr);
5200       ierr = PetscMalloc1(ai[am]+bi[am],&ca);CHKERRQ(ierr);
5201       ci[0] = 0;
5202       for (i=0,k=0; i<am; i++) {
5203         const PetscInt ncols_o = bi[i+1] - bi[i];
5204         const PetscInt ncols_d = ai[i+1] - ai[i];
5205         ci[i+1] = ci[i] + ncols_o + ncols_d;
5206         /* diagonal portion of A */
5207         for (j=0; j<ncols_d; j++,k++) {
5208           cj[k] = *aj++;
5209           ca[k] = *aa++;
5210         }
5211         /* off-diagonal portion of A */
5212         for (j=0; j<ncols_o; j++,k++) {
5213           cj[k] = dn + *bj++;
5214           ca[k] = *ba++;
5215         }
5216       }
5217       /* put together the new matrix */
5218       ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,am,dn+on,ci,cj,ca,A_loc);CHKERRQ(ierr);
5219       /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5220       /* Since these are PETSc arrays, change flags to free them as necessary. */
5221       c          = (Mat_SeqAIJ*)(*A_loc)->data;
5222       c->free_a  = PETSC_TRUE;
5223       c->free_ij = PETSC_TRUE;
5224       c->nonew   = 0;
5225       ierr = MatSetType(*A_loc,((PetscObject)Ad)->type_name);CHKERRQ(ierr);
5226     } else if (scall == MAT_REUSE_MATRIX) {
5227 #if defined(PETSC_HAVE_DEVICE)
5228       (*A_loc)->offloadmask = PETSC_OFFLOAD_CPU;
5229 #endif
5230       c  = (Mat_SeqAIJ*)(*A_loc)->data;
5231       ca = c->a;
5232       for (i=0; i<am; i++) {
5233         const PetscInt ncols_d = ai[i+1] - ai[i];
5234         const PetscInt ncols_o = bi[i+1] - bi[i];
5235         /* diagonal portion of A */
5236         for (j=0; j<ncols_d; j++) *ca++ = *aa++;
5237         /* off-diagonal portion of A */
5238         for (j=0; j<ncols_o; j++) *ca++ = *ba++;
5239       }
5240     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid MatReuse %d",(int)scall);
5241     ierr = MatSeqAIJRestoreArrayRead(Ad,&aa);CHKERRQ(ierr);
5242     ierr = MatSeqAIJRestoreArrayRead(Ao,&aa);CHKERRQ(ierr);
5243     if (glob) {
5244       PetscInt cst, *gidx;
5245 
5246       ierr = MatGetOwnershipRangeColumn(A,&cst,NULL);CHKERRQ(ierr);
5247       ierr = PetscMalloc1(dn+on,&gidx);CHKERRQ(ierr);
5248       for (i=0; i<dn; i++) gidx[i]    = cst + i;
5249       for (i=0; i<on; i++) gidx[i+dn] = cmap[i];
5250       ierr = ISCreateGeneral(PetscObjectComm((PetscObject)Ad),dn+on,gidx,PETSC_OWN_POINTER,glob);CHKERRQ(ierr);
5251     }
5252   }
5253   ierr = PetscLogEventEnd(MAT_Getlocalmat,A,0,0,0);CHKERRQ(ierr);
5254   PetscFunctionReturn(0);
5255 }
5256 
5257 /*@C
5258      MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns
5259 
5260     Not Collective
5261 
5262    Input Parameters:
5263 +    A - the matrix
5264 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5265 -    row, col - index sets of rows and columns to extract (or NULL)
5266 
5267    Output Parameter:
5268 .    A_loc - the local sequential matrix generated
5269 
5270     Level: developer
5271 
5272 .seealso: MatGetOwnershipRange(), MatMPIAIJGetLocalMat()
5273 
5274 @*/
5275 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A,MatReuse scall,IS *row,IS *col,Mat *A_loc)
5276 {
5277   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5278   PetscErrorCode ierr;
5279   PetscInt       i,start,end,ncols,nzA,nzB,*cmap,imark,*idx;
5280   IS             isrowa,iscola;
5281   Mat            *aloc;
5282   PetscBool      match;
5283 
5284   PetscFunctionBegin;
5285   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&match);CHKERRQ(ierr);
5286   if (!match) SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP,"Requires MATMPIAIJ matrix as input");
5287   ierr = PetscLogEventBegin(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5288   if (!row) {
5289     start = A->rmap->rstart; end = A->rmap->rend;
5290     ierr  = ISCreateStride(PETSC_COMM_SELF,end-start,start,1,&isrowa);CHKERRQ(ierr);
5291   } else {
5292     isrowa = *row;
5293   }
5294   if (!col) {
5295     start = A->cmap->rstart;
5296     cmap  = a->garray;
5297     nzA   = a->A->cmap->n;
5298     nzB   = a->B->cmap->n;
5299     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5300     ncols = 0;
5301     for (i=0; i<nzB; i++) {
5302       if (cmap[i] < start) idx[ncols++] = cmap[i];
5303       else break;
5304     }
5305     imark = i;
5306     for (i=0; i<nzA; i++) idx[ncols++] = start + i;
5307     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i];
5308     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&iscola);CHKERRQ(ierr);
5309   } else {
5310     iscola = *col;
5311   }
5312   if (scall != MAT_INITIAL_MATRIX) {
5313     ierr    = PetscMalloc1(1,&aloc);CHKERRQ(ierr);
5314     aloc[0] = *A_loc;
5315   }
5316   ierr = MatCreateSubMatrices(A,1,&isrowa,&iscola,scall,&aloc);CHKERRQ(ierr);
5317   if (!col) { /* attach global id of condensed columns */
5318     ierr = PetscObjectCompose((PetscObject)aloc[0],"_petsc_GetLocalMatCondensed_iscol",(PetscObject)iscola);CHKERRQ(ierr);
5319   }
5320   *A_loc = aloc[0];
5321   ierr   = PetscFree(aloc);CHKERRQ(ierr);
5322   if (!row) {
5323     ierr = ISDestroy(&isrowa);CHKERRQ(ierr);
5324   }
5325   if (!col) {
5326     ierr = ISDestroy(&iscola);CHKERRQ(ierr);
5327   }
5328   ierr = PetscLogEventEnd(MAT_Getlocalmatcondensed,A,0,0,0);CHKERRQ(ierr);
5329   PetscFunctionReturn(0);
5330 }
5331 
5332 /*
5333  * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched.
5334  * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based
5335  * on a global size.
5336  * */
5337 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P,IS rows,Mat *P_oth)
5338 {
5339   Mat_MPIAIJ               *p=(Mat_MPIAIJ*)P->data;
5340   Mat_SeqAIJ               *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data,*p_oth;
5341   PetscInt                 plocalsize,nrows,*ilocal,*oilocal,i,lidx,*nrcols,*nlcols,ncol;
5342   PetscMPIInt              owner;
5343   PetscSFNode              *iremote,*oiremote;
5344   const PetscInt           *lrowindices;
5345   PetscErrorCode           ierr;
5346   PetscSF                  sf,osf;
5347   PetscInt                 pcstart,*roffsets,*loffsets,*pnnz,j;
5348   PetscInt                 ontotalcols,dntotalcols,ntotalcols,nout;
5349   MPI_Comm                 comm;
5350   ISLocalToGlobalMapping   mapping;
5351 
5352   PetscFunctionBegin;
5353   ierr = PetscObjectGetComm((PetscObject)P,&comm);CHKERRQ(ierr);
5354   /* plocalsize is the number of roots
5355    * nrows is the number of leaves
5356    * */
5357   ierr = MatGetLocalSize(P,&plocalsize,NULL);CHKERRQ(ierr);
5358   ierr = ISGetLocalSize(rows,&nrows);CHKERRQ(ierr);
5359   ierr = PetscCalloc1(nrows,&iremote);CHKERRQ(ierr);
5360   ierr = ISGetIndices(rows,&lrowindices);CHKERRQ(ierr);
5361   for (i=0;i<nrows;i++) {
5362     /* Find a remote index and an owner for a row
5363      * The row could be local or remote
5364      * */
5365     owner = 0;
5366     lidx  = 0;
5367     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,&lidx);CHKERRQ(ierr);
5368     iremote[i].index = lidx;
5369     iremote[i].rank  = owner;
5370   }
5371   /* Create SF to communicate how many nonzero columns for each row */
5372   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5373   /* SF will figure out the number of nonzero colunms for each row, and their
5374    * offsets
5375    * */
5376   ierr = PetscSFSetGraph(sf,plocalsize,nrows,NULL,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5377   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5378   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5379 
5380   ierr = PetscCalloc1(2*(plocalsize+1),&roffsets);CHKERRQ(ierr);
5381   ierr = PetscCalloc1(2*plocalsize,&nrcols);CHKERRQ(ierr);
5382   ierr = PetscCalloc1(nrows,&pnnz);CHKERRQ(ierr);
5383   roffsets[0] = 0;
5384   roffsets[1] = 0;
5385   for (i=0;i<plocalsize;i++) {
5386     /* diag */
5387     nrcols[i*2+0] = pd->i[i+1] - pd->i[i];
5388     /* off diag */
5389     nrcols[i*2+1] = po->i[i+1] - po->i[i];
5390     /* compute offsets so that we relative location for each row */
5391     roffsets[(i+1)*2+0] = roffsets[i*2+0] + nrcols[i*2+0];
5392     roffsets[(i+1)*2+1] = roffsets[i*2+1] + nrcols[i*2+1];
5393   }
5394   ierr = PetscCalloc1(2*nrows,&nlcols);CHKERRQ(ierr);
5395   ierr = PetscCalloc1(2*nrows,&loffsets);CHKERRQ(ierr);
5396   /* 'r' means root, and 'l' means leaf */
5397   ierr = PetscSFBcastBegin(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5398   ierr = PetscSFBcastBegin(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5399   ierr = PetscSFBcastEnd(sf,MPIU_2INT,nrcols,nlcols);CHKERRQ(ierr);
5400   ierr = PetscSFBcastEnd(sf,MPIU_2INT,roffsets,loffsets);CHKERRQ(ierr);
5401   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5402   ierr = PetscFree(roffsets);CHKERRQ(ierr);
5403   ierr = PetscFree(nrcols);CHKERRQ(ierr);
5404   dntotalcols = 0;
5405   ontotalcols = 0;
5406   ncol = 0;
5407   for (i=0;i<nrows;i++) {
5408     pnnz[i] = nlcols[i*2+0] + nlcols[i*2+1];
5409     ncol = PetscMax(pnnz[i],ncol);
5410     /* diag */
5411     dntotalcols += nlcols[i*2+0];
5412     /* off diag */
5413     ontotalcols += nlcols[i*2+1];
5414   }
5415   /* We do not need to figure the right number of columns
5416    * since all the calculations will be done by going through the raw data
5417    * */
5418   ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,nrows,ncol,0,pnnz,P_oth);CHKERRQ(ierr);
5419   ierr = MatSetUp(*P_oth);CHKERRQ(ierr);
5420   ierr = PetscFree(pnnz);CHKERRQ(ierr);
5421   p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5422   /* diag */
5423   ierr = PetscCalloc1(dntotalcols,&iremote);CHKERRQ(ierr);
5424   /* off diag */
5425   ierr = PetscCalloc1(ontotalcols,&oiremote);CHKERRQ(ierr);
5426   /* diag */
5427   ierr = PetscCalloc1(dntotalcols,&ilocal);CHKERRQ(ierr);
5428   /* off diag */
5429   ierr = PetscCalloc1(ontotalcols,&oilocal);CHKERRQ(ierr);
5430   dntotalcols = 0;
5431   ontotalcols = 0;
5432   ntotalcols  = 0;
5433   for (i=0;i<nrows;i++) {
5434     owner = 0;
5435     ierr = PetscLayoutFindOwnerIndex(P->rmap,lrowindices[i],&owner,NULL);CHKERRQ(ierr);
5436     /* Set iremote for diag matrix */
5437     for (j=0;j<nlcols[i*2+0];j++) {
5438       iremote[dntotalcols].index   = loffsets[i*2+0] + j;
5439       iremote[dntotalcols].rank    = owner;
5440       /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */
5441       ilocal[dntotalcols++]        = ntotalcols++;
5442     }
5443     /* off diag */
5444     for (j=0;j<nlcols[i*2+1];j++) {
5445       oiremote[ontotalcols].index   = loffsets[i*2+1] + j;
5446       oiremote[ontotalcols].rank    = owner;
5447       oilocal[ontotalcols++]        = ntotalcols++;
5448     }
5449   }
5450   ierr = ISRestoreIndices(rows,&lrowindices);CHKERRQ(ierr);
5451   ierr = PetscFree(loffsets);CHKERRQ(ierr);
5452   ierr = PetscFree(nlcols);CHKERRQ(ierr);
5453   ierr = PetscSFCreate(comm,&sf);CHKERRQ(ierr);
5454   /* P serves as roots and P_oth is leaves
5455    * Diag matrix
5456    * */
5457   ierr = PetscSFSetGraph(sf,pd->i[plocalsize],dntotalcols,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5458   ierr = PetscSFSetFromOptions(sf);CHKERRQ(ierr);
5459   ierr = PetscSFSetUp(sf);CHKERRQ(ierr);
5460 
5461   ierr = PetscSFCreate(comm,&osf);CHKERRQ(ierr);
5462   /* Off diag */
5463   ierr = PetscSFSetGraph(osf,po->i[plocalsize],ontotalcols,oilocal,PETSC_OWN_POINTER,oiremote,PETSC_OWN_POINTER);CHKERRQ(ierr);
5464   ierr = PetscSFSetFromOptions(osf);CHKERRQ(ierr);
5465   ierr = PetscSFSetUp(osf);CHKERRQ(ierr);
5466   /* We operate on the matrix internal data for saving memory */
5467   ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5468   ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5469   ierr = MatGetOwnershipRangeColumn(P,&pcstart,NULL);CHKERRQ(ierr);
5470   /* Convert to global indices for diag matrix */
5471   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] += pcstart;
5472   ierr = PetscSFBcastBegin(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5473   /* We want P_oth store global indices */
5474   ierr = ISLocalToGlobalMappingCreate(comm,1,p->B->cmap->n,p->garray,PETSC_COPY_VALUES,&mapping);CHKERRQ(ierr);
5475   /* Use memory scalable approach */
5476   ierr = ISLocalToGlobalMappingSetType(mapping,ISLOCALTOGLOBALMAPPINGHASH);CHKERRQ(ierr);
5477   ierr = ISLocalToGlobalMappingApply(mapping,po->i[plocalsize],po->j,po->j);CHKERRQ(ierr);
5478   ierr = PetscSFBcastBegin(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5479   ierr = PetscSFBcastEnd(sf,MPIU_INT,pd->j,p_oth->j);CHKERRQ(ierr);
5480   /* Convert back to local indices */
5481   for (i=0;i<pd->i[plocalsize];i++) pd->j[i] -= pcstart;
5482   ierr = PetscSFBcastEnd(osf,MPIU_INT,po->j,p_oth->j);CHKERRQ(ierr);
5483   nout = 0;
5484   ierr = ISGlobalToLocalMappingApply(mapping,IS_GTOLM_DROP,po->i[plocalsize],po->j,&nout,po->j);CHKERRQ(ierr);
5485   if (nout != po->i[plocalsize]) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP,"n %D does not equal to nout %D \n",po->i[plocalsize],nout);
5486   ierr = ISLocalToGlobalMappingDestroy(&mapping);CHKERRQ(ierr);
5487   /* Exchange values */
5488   ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5489   ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5490   /* Stop PETSc from shrinking memory */
5491   for (i=0;i<nrows;i++) p_oth->ilen[i] = p_oth->imax[i];
5492   ierr = MatAssemblyBegin(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5493   ierr = MatAssemblyEnd(*P_oth,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
5494   /* Attach PetscSF objects to P_oth so that we can reuse it later */
5495   ierr = PetscObjectCompose((PetscObject)*P_oth,"diagsf",(PetscObject)sf);CHKERRQ(ierr);
5496   ierr = PetscObjectCompose((PetscObject)*P_oth,"offdiagsf",(PetscObject)osf);CHKERRQ(ierr);
5497   ierr = PetscSFDestroy(&sf);CHKERRQ(ierr);
5498   ierr = PetscSFDestroy(&osf);CHKERRQ(ierr);
5499   PetscFunctionReturn(0);
5500 }
5501 
5502 /*
5503  * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5504  * This supports MPIAIJ and MAIJ
5505  * */
5506 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A,Mat P,PetscInt dof,MatReuse reuse,Mat *P_oth)
5507 {
5508   Mat_MPIAIJ            *a=(Mat_MPIAIJ*)A->data,*p=(Mat_MPIAIJ*)P->data;
5509   Mat_SeqAIJ            *p_oth;
5510   Mat_SeqAIJ            *pd=(Mat_SeqAIJ*)(p->A)->data,*po=(Mat_SeqAIJ*)(p->B)->data;
5511   IS                    rows,map;
5512   PetscHMapI            hamp;
5513   PetscInt              i,htsize,*rowindices,off,*mapping,key,count;
5514   MPI_Comm              comm;
5515   PetscSF               sf,osf;
5516   PetscBool             has;
5517   PetscErrorCode        ierr;
5518 
5519   PetscFunctionBegin;
5520   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5521   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5522   /* If it is the first time, create an index set of off-diag nonzero columns of A,
5523    *  and then create a submatrix (that often is an overlapping matrix)
5524    * */
5525   if (reuse == MAT_INITIAL_MATRIX) {
5526     /* Use a hash table to figure out unique keys */
5527     ierr = PetscHMapICreate(&hamp);CHKERRQ(ierr);
5528     ierr = PetscHMapIResize(hamp,a->B->cmap->n);CHKERRQ(ierr);
5529     ierr = PetscCalloc1(a->B->cmap->n,&mapping);CHKERRQ(ierr);
5530     count = 0;
5531     /* Assume that  a->g is sorted, otherwise the following does not make sense */
5532     for (i=0;i<a->B->cmap->n;i++) {
5533       key  = a->garray[i]/dof;
5534       ierr = PetscHMapIHas(hamp,key,&has);CHKERRQ(ierr);
5535       if (!has) {
5536         mapping[i] = count;
5537         ierr = PetscHMapISet(hamp,key,count++);CHKERRQ(ierr);
5538       } else {
5539         /* Current 'i' has the same value the previous step */
5540         mapping[i] = count-1;
5541       }
5542     }
5543     ierr = ISCreateGeneral(comm,a->B->cmap->n,mapping,PETSC_OWN_POINTER,&map);CHKERRQ(ierr);
5544     ierr = PetscHMapIGetSize(hamp,&htsize);CHKERRQ(ierr);
5545     if (htsize!=count) SETERRQ2(comm,PETSC_ERR_ARG_INCOMP," Size of hash map %D is inconsistent with count %D \n",htsize,count);CHKERRQ(ierr);
5546     ierr = PetscCalloc1(htsize,&rowindices);CHKERRQ(ierr);
5547     off = 0;
5548     ierr = PetscHMapIGetKeys(hamp,&off,rowindices);CHKERRQ(ierr);
5549     ierr = PetscHMapIDestroy(&hamp);CHKERRQ(ierr);
5550     ierr = PetscSortInt(htsize,rowindices);CHKERRQ(ierr);
5551     ierr = ISCreateGeneral(comm,htsize,rowindices,PETSC_OWN_POINTER,&rows);CHKERRQ(ierr);
5552     /* In case, the matrix was already created but users want to recreate the matrix */
5553     ierr = MatDestroy(P_oth);CHKERRQ(ierr);
5554     ierr = MatCreateSeqSubMatrixWithRows_Private(P,rows,P_oth);CHKERRQ(ierr);
5555     ierr = PetscObjectCompose((PetscObject)*P_oth,"aoffdiagtopothmapping",(PetscObject)map);CHKERRQ(ierr);
5556     ierr = ISDestroy(&map);CHKERRQ(ierr);
5557     ierr = ISDestroy(&rows);CHKERRQ(ierr);
5558   } else if (reuse == MAT_REUSE_MATRIX) {
5559     /* If matrix was already created, we simply update values using SF objects
5560      * that as attached to the matrix ealier.
5561      *  */
5562     ierr = PetscObjectQuery((PetscObject)*P_oth,"diagsf",(PetscObject*)&sf);CHKERRQ(ierr);
5563     ierr = PetscObjectQuery((PetscObject)*P_oth,"offdiagsf",(PetscObject*)&osf);CHKERRQ(ierr);
5564     if (!sf || !osf) SETERRQ(comm,PETSC_ERR_ARG_NULL,"Matrix is not initialized yet");
5565     p_oth = (Mat_SeqAIJ*) (*P_oth)->data;
5566     /* Update values in place */
5567     ierr = PetscSFBcastBegin(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5568     ierr = PetscSFBcastBegin(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5569     ierr = PetscSFBcastEnd(sf,MPIU_SCALAR,pd->a,p_oth->a);CHKERRQ(ierr);
5570     ierr = PetscSFBcastEnd(osf,MPIU_SCALAR,po->a,p_oth->a);CHKERRQ(ierr);
5571   } else SETERRQ(comm,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unknown reuse type");
5572   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,P,0,0);CHKERRQ(ierr);
5573   PetscFunctionReturn(0);
5574 }
5575 
5576 /*@C
5577     MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A
5578 
5579     Collective on Mat
5580 
5581    Input Parameters:
5582 +    A,B - the matrices in mpiaij format
5583 .    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5584 -    rowb, colb - index sets of rows and columns of B to extract (or NULL)
5585 
5586    Output Parameter:
5587 +    rowb, colb - index sets of rows and columns of B to extract
5588 -    B_seq - the sequential matrix generated
5589 
5590     Level: developer
5591 
5592 @*/
5593 PetscErrorCode MatGetBrowsOfAcols(Mat A,Mat B,MatReuse scall,IS *rowb,IS *colb,Mat *B_seq)
5594 {
5595   Mat_MPIAIJ     *a=(Mat_MPIAIJ*)A->data;
5596   PetscErrorCode ierr;
5597   PetscInt       *idx,i,start,ncols,nzA,nzB,*cmap,imark;
5598   IS             isrowb,iscolb;
5599   Mat            *bseq=NULL;
5600 
5601   PetscFunctionBegin;
5602   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5603     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5604   }
5605   ierr = PetscLogEventBegin(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5606 
5607   if (scall == MAT_INITIAL_MATRIX) {
5608     start = A->cmap->rstart;
5609     cmap  = a->garray;
5610     nzA   = a->A->cmap->n;
5611     nzB   = a->B->cmap->n;
5612     ierr  = PetscMalloc1(nzA+nzB, &idx);CHKERRQ(ierr);
5613     ncols = 0;
5614     for (i=0; i<nzB; i++) {  /* row < local row index */
5615       if (cmap[i] < start) idx[ncols++] = cmap[i];
5616       else break;
5617     }
5618     imark = i;
5619     for (i=0; i<nzA; i++) idx[ncols++] = start + i;  /* local rows */
5620     for (i=imark; i<nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */
5621     ierr = ISCreateGeneral(PETSC_COMM_SELF,ncols,idx,PETSC_OWN_POINTER,&isrowb);CHKERRQ(ierr);
5622     ierr = ISCreateStride(PETSC_COMM_SELF,B->cmap->N,0,1,&iscolb);CHKERRQ(ierr);
5623   } else {
5624     if (!rowb || !colb) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"IS rowb and colb must be provided for MAT_REUSE_MATRIX");
5625     isrowb  = *rowb; iscolb = *colb;
5626     ierr    = PetscMalloc1(1,&bseq);CHKERRQ(ierr);
5627     bseq[0] = *B_seq;
5628   }
5629   ierr   = MatCreateSubMatrices(B,1,&isrowb,&iscolb,scall,&bseq);CHKERRQ(ierr);
5630   *B_seq = bseq[0];
5631   ierr   = PetscFree(bseq);CHKERRQ(ierr);
5632   if (!rowb) {
5633     ierr = ISDestroy(&isrowb);CHKERRQ(ierr);
5634   } else {
5635     *rowb = isrowb;
5636   }
5637   if (!colb) {
5638     ierr = ISDestroy(&iscolb);CHKERRQ(ierr);
5639   } else {
5640     *colb = iscolb;
5641   }
5642   ierr = PetscLogEventEnd(MAT_GetBrowsOfAcols,A,B,0,0);CHKERRQ(ierr);
5643   PetscFunctionReturn(0);
5644 }
5645 
5646 /*
5647     MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns
5648     of the OFF-DIAGONAL portion of local A
5649 
5650     Collective on Mat
5651 
5652    Input Parameters:
5653 +    A,B - the matrices in mpiaij format
5654 -    scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX
5655 
5656    Output Parameter:
5657 +    startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL)
5658 .    startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL)
5659 .    bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL)
5660 -    B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N
5661 
5662     Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product
5663      for this matrix. This is not desirable..
5664 
5665     Level: developer
5666 
5667 */
5668 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A,Mat B,MatReuse scall,PetscInt **startsj_s,PetscInt **startsj_r,MatScalar **bufa_ptr,Mat *B_oth)
5669 {
5670   PetscErrorCode         ierr;
5671   Mat_MPIAIJ             *a=(Mat_MPIAIJ*)A->data;
5672   Mat_SeqAIJ             *b_oth;
5673   VecScatter             ctx;
5674   MPI_Comm               comm;
5675   const PetscMPIInt      *rprocs,*sprocs;
5676   const PetscInt         *srow,*rstarts,*sstarts;
5677   PetscInt               *rowlen,*bufj,*bufJ,ncols = 0,aBn=a->B->cmap->n,row,*b_othi,*b_othj,*rvalues=NULL,*svalues=NULL,*cols,sbs,rbs;
5678   PetscInt               i,j,k=0,l,ll,nrecvs,nsends,nrows,*rstartsj = NULL,*sstartsj,len;
5679   PetscScalar            *b_otha,*bufa,*bufA,*vals = NULL;
5680   MPI_Request            *rwaits = NULL,*swaits = NULL;
5681   MPI_Status             rstatus;
5682   PetscMPIInt            size,tag,rank,nsends_mpi,nrecvs_mpi;
5683   PETSC_UNUSED PetscMPIInt jj;
5684 
5685   PetscFunctionBegin;
5686   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
5687   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
5688 
5689   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) {
5690     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%d, %d) != (%d,%d)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
5691   }
5692   ierr = PetscLogEventBegin(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5693   ierr = MPI_Comm_rank(comm,&rank);CHKERRMPI(ierr);
5694 
5695   if (size == 1) {
5696     startsj_s = NULL;
5697     bufa_ptr  = NULL;
5698     *B_oth    = NULL;
5699     PetscFunctionReturn(0);
5700   }
5701 
5702   ctx = a->Mvctx;
5703   tag = ((PetscObject)ctx)->tag;
5704 
5705   ierr = VecScatterGetRemote_Private(ctx,PETSC_TRUE/*send*/,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5706   /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */
5707   ierr = VecScatterGetRemoteOrdered_Private(ctx,PETSC_FALSE/*recv*/,&nrecvs,&rstarts,NULL/*indices not needed*/,&rprocs,&rbs);CHKERRQ(ierr);
5708   ierr = PetscMPIIntCast(nsends,&nsends_mpi);CHKERRQ(ierr);
5709   ierr = PetscMPIIntCast(nrecvs,&nrecvs_mpi);CHKERRQ(ierr);
5710   ierr = PetscMalloc2(nrecvs,&rwaits,nsends,&swaits);CHKERRQ(ierr);
5711 
5712   if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX;
5713   if (scall == MAT_INITIAL_MATRIX) {
5714     /* i-array */
5715     /*---------*/
5716     /*  post receives */
5717     if (nrecvs) {ierr = PetscMalloc1(rbs*(rstarts[nrecvs] - rstarts[0]),&rvalues);CHKERRQ(ierr);} /* rstarts can be NULL when nrecvs=0 */
5718     for (i=0; i<nrecvs; i++) {
5719       rowlen = rvalues + rstarts[i]*rbs;
5720       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of indices to be received */
5721       ierr   = MPI_Irecv(rowlen,nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5722     }
5723 
5724     /* pack the outgoing message */
5725     ierr = PetscMalloc2(nsends+1,&sstartsj,nrecvs+1,&rstartsj);CHKERRQ(ierr);
5726 
5727     sstartsj[0] = 0;
5728     rstartsj[0] = 0;
5729     len         = 0; /* total length of j or a array to be sent */
5730     if (nsends) {
5731       k    = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */
5732       ierr = PetscMalloc1(sbs*(sstarts[nsends]-sstarts[0]),&svalues);CHKERRQ(ierr);
5733     }
5734     for (i=0; i<nsends; i++) {
5735       rowlen = svalues + (sstarts[i]-sstarts[0])*sbs;
5736       nrows  = sstarts[i+1]-sstarts[i]; /* num of block rows */
5737       for (j=0; j<nrows; j++) {
5738         row = srow[k] + B->rmap->range[rank]; /* global row idx */
5739         for (l=0; l<sbs; l++) {
5740           ierr = MatGetRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr); /* rowlength */
5741 
5742           rowlen[j*sbs+l] = ncols;
5743 
5744           len += ncols;
5745           ierr = MatRestoreRow_MPIAIJ(B,row+l,&ncols,NULL,NULL);CHKERRQ(ierr);
5746         }
5747         k++;
5748       }
5749       ierr = MPI_Isend(rowlen,nrows*sbs,MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5750 
5751       sstartsj[i+1] = len;  /* starting point of (i+1)-th outgoing msg in bufj and bufa */
5752     }
5753     /* recvs and sends of i-array are completed */
5754     i = nrecvs;
5755     while (i--) {
5756       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5757     }
5758     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5759     ierr = PetscFree(svalues);CHKERRQ(ierr);
5760 
5761     /* allocate buffers for sending j and a arrays */
5762     ierr = PetscMalloc1(len+1,&bufj);CHKERRQ(ierr);
5763     ierr = PetscMalloc1(len+1,&bufa);CHKERRQ(ierr);
5764 
5765     /* create i-array of B_oth */
5766     ierr = PetscMalloc1(aBn+2,&b_othi);CHKERRQ(ierr);
5767 
5768     b_othi[0] = 0;
5769     len       = 0; /* total length of j or a array to be received */
5770     k         = 0;
5771     for (i=0; i<nrecvs; i++) {
5772       rowlen = rvalues + (rstarts[i]-rstarts[0])*rbs;
5773       nrows  = (rstarts[i+1]-rstarts[i])*rbs; /* num of rows to be received */
5774       for (j=0; j<nrows; j++) {
5775         b_othi[k+1] = b_othi[k] + rowlen[j];
5776         ierr = PetscIntSumError(rowlen[j],len,&len);CHKERRQ(ierr);
5777         k++;
5778       }
5779       rstartsj[i+1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */
5780     }
5781     ierr = PetscFree(rvalues);CHKERRQ(ierr);
5782 
5783     /* allocate space for j and a arrrays of B_oth */
5784     ierr = PetscMalloc1(b_othi[aBn]+1,&b_othj);CHKERRQ(ierr);
5785     ierr = PetscMalloc1(b_othi[aBn]+1,&b_otha);CHKERRQ(ierr);
5786 
5787     /* j-array */
5788     /*---------*/
5789     /*  post receives of j-array */
5790     for (i=0; i<nrecvs; i++) {
5791       nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5792       ierr  = MPI_Irecv(b_othj+rstartsj[i],nrows,MPIU_INT,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5793     }
5794 
5795     /* pack the outgoing message j-array */
5796     if (nsends) k = sstarts[0];
5797     for (i=0; i<nsends; i++) {
5798       nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5799       bufJ  = bufj+sstartsj[i];
5800       for (j=0; j<nrows; j++) {
5801         row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5802         for (ll=0; ll<sbs; ll++) {
5803           ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5804           for (l=0; l<ncols; l++) {
5805             *bufJ++ = cols[l];
5806           }
5807           ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,&cols,NULL);CHKERRQ(ierr);
5808         }
5809       }
5810       ierr = MPI_Isend(bufj+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_INT,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5811     }
5812 
5813     /* recvs and sends of j-array are completed */
5814     i = nrecvs;
5815     while (i--) {
5816       ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5817     }
5818     if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5819   } else if (scall == MAT_REUSE_MATRIX) {
5820     sstartsj = *startsj_s;
5821     rstartsj = *startsj_r;
5822     bufa     = *bufa_ptr;
5823     b_oth    = (Mat_SeqAIJ*)(*B_oth)->data;
5824     b_otha   = b_oth->a;
5825 #if defined(PETSC_HAVE_DEVICE)
5826     (*B_oth)->offloadmask = PETSC_OFFLOAD_CPU;
5827 #endif
5828   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not posses an object container");
5829 
5830   /* a-array */
5831   /*---------*/
5832   /*  post receives of a-array */
5833   for (i=0; i<nrecvs; i++) {
5834     nrows = rstartsj[i+1]-rstartsj[i]; /* length of the msg received */
5835     ierr  = MPI_Irecv(b_otha+rstartsj[i],nrows,MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);CHKERRQ(ierr);
5836   }
5837 
5838   /* pack the outgoing message a-array */
5839   if (nsends) k = sstarts[0];
5840   for (i=0; i<nsends; i++) {
5841     nrows = sstarts[i+1]-sstarts[i]; /* num of block rows */
5842     bufA  = bufa+sstartsj[i];
5843     for (j=0; j<nrows; j++) {
5844       row = srow[k++] + B->rmap->range[rank];  /* global row idx */
5845       for (ll=0; ll<sbs; ll++) {
5846         ierr = MatGetRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5847         for (l=0; l<ncols; l++) {
5848           *bufA++ = vals[l];
5849         }
5850         ierr = MatRestoreRow_MPIAIJ(B,row+ll,&ncols,NULL,&vals);CHKERRQ(ierr);
5851       }
5852     }
5853     ierr = MPI_Isend(bufa+sstartsj[i],sstartsj[i+1]-sstartsj[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);CHKERRMPI(ierr);
5854   }
5855   /* recvs and sends of a-array are completed */
5856   i = nrecvs;
5857   while (i--) {
5858     ierr = MPI_Waitany(nrecvs_mpi,rwaits,&jj,&rstatus);CHKERRMPI(ierr);
5859   }
5860   if (nsends) {ierr = MPI_Waitall(nsends_mpi,swaits,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);}
5861   ierr = PetscFree2(rwaits,swaits);CHKERRQ(ierr);
5862 
5863   if (scall == MAT_INITIAL_MATRIX) {
5864     /* put together the new matrix */
5865     ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,aBn,B->cmap->N,b_othi,b_othj,b_otha,B_oth);CHKERRQ(ierr);
5866 
5867     /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */
5868     /* Since these are PETSc arrays, change flags to free them as necessary. */
5869     b_oth          = (Mat_SeqAIJ*)(*B_oth)->data;
5870     b_oth->free_a  = PETSC_TRUE;
5871     b_oth->free_ij = PETSC_TRUE;
5872     b_oth->nonew   = 0;
5873 
5874     ierr = PetscFree(bufj);CHKERRQ(ierr);
5875     if (!startsj_s || !bufa_ptr) {
5876       ierr = PetscFree2(sstartsj,rstartsj);CHKERRQ(ierr);
5877       ierr = PetscFree(bufa_ptr);CHKERRQ(ierr);
5878     } else {
5879       *startsj_s = sstartsj;
5880       *startsj_r = rstartsj;
5881       *bufa_ptr  = bufa;
5882     }
5883   }
5884 
5885   ierr = VecScatterRestoreRemote_Private(ctx,PETSC_TRUE,&nsends,&sstarts,&srow,&sprocs,&sbs);CHKERRQ(ierr);
5886   ierr = VecScatterRestoreRemoteOrdered_Private(ctx,PETSC_FALSE,&nrecvs,&rstarts,NULL,&rprocs,&rbs);CHKERRQ(ierr);
5887   ierr = PetscLogEventEnd(MAT_GetBrowsOfAocols,A,B,0,0);CHKERRQ(ierr);
5888   PetscFunctionReturn(0);
5889 }
5890 
5891 /*@C
5892   MatGetCommunicationStructs - Provides access to the communication structures used in matrix-vector multiplication.
5893 
5894   Not Collective
5895 
5896   Input Parameters:
5897 . A - The matrix in mpiaij format
5898 
5899   Output Parameter:
5900 + lvec - The local vector holding off-process values from the argument to a matrix-vector product
5901 . colmap - A map from global column index to local index into lvec
5902 - multScatter - A scatter from the argument of a matrix-vector product to lvec
5903 
5904   Level: developer
5905 
5906 @*/
5907 #if defined(PETSC_USE_CTABLE)
5908 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscTable *colmap, VecScatter *multScatter)
5909 #else
5910 PetscErrorCode MatGetCommunicationStructs(Mat A, Vec *lvec, PetscInt *colmap[], VecScatter *multScatter)
5911 #endif
5912 {
5913   Mat_MPIAIJ *a;
5914 
5915   PetscFunctionBegin;
5916   PetscValidHeaderSpecific(A, MAT_CLASSID, 1);
5917   PetscValidPointer(lvec, 2);
5918   PetscValidPointer(colmap, 3);
5919   PetscValidPointer(multScatter, 4);
5920   a = (Mat_MPIAIJ*) A->data;
5921   if (lvec) *lvec = a->lvec;
5922   if (colmap) *colmap = a->colmap;
5923   if (multScatter) *multScatter = a->Mvctx;
5924   PetscFunctionReturn(0);
5925 }
5926 
5927 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat,MatType,MatReuse,Mat*);
5928 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat,MatType,MatReuse,Mat*);
5929 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat,MatType,MatReuse,Mat*);
5930 #if defined(PETSC_HAVE_MKL_SPARSE)
5931 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat,MatType,MatReuse,Mat*);
5932 #endif
5933 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat,MatType,MatReuse,Mat*);
5934 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat,MatType,MatReuse,Mat*);
5935 #if defined(PETSC_HAVE_ELEMENTAL)
5936 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat,MatType,MatReuse,Mat*);
5937 #endif
5938 #if defined(PETSC_HAVE_SCALAPACK)
5939 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat,MatType,MatReuse,Mat*);
5940 #endif
5941 #if defined(PETSC_HAVE_HYPRE)
5942 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
5943 #endif
5944 #if defined(PETSC_HAVE_CUDA)
5945 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat,MatType,MatReuse,Mat*);
5946 #endif
5947 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
5948 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat,MatType,MatReuse,Mat*);
5949 #endif
5950 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat,MatType,MatReuse,Mat*);
5951 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
5952 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat);
5953 
5954 /*
5955     Computes (B'*A')' since computing B*A directly is untenable
5956 
5957                n                       p                          p
5958         [             ]       [             ]         [                 ]
5959       m [      A      ]  *  n [       B     ]   =   m [         C       ]
5960         [             ]       [             ]         [                 ]
5961 
5962 */
5963 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A,Mat B,Mat C)
5964 {
5965   PetscErrorCode ierr;
5966   Mat            At,Bt,Ct;
5967 
5968   PetscFunctionBegin;
5969   ierr = MatTranspose(A,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr);
5970   ierr = MatTranspose(B,MAT_INITIAL_MATRIX,&Bt);CHKERRQ(ierr);
5971   ierr = MatMatMult(Bt,At,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&Ct);CHKERRQ(ierr);
5972   ierr = MatDestroy(&At);CHKERRQ(ierr);
5973   ierr = MatDestroy(&Bt);CHKERRQ(ierr);
5974   ierr = MatTranspose(Ct,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr);
5975   ierr = MatDestroy(&Ct);CHKERRQ(ierr);
5976   PetscFunctionReturn(0);
5977 }
5978 
5979 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A,Mat B,PetscReal fill,Mat C)
5980 {
5981   PetscErrorCode ierr;
5982   PetscBool      cisdense;
5983 
5984   PetscFunctionBegin;
5985   if (A->cmap->n != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"A->cmap->n %d != B->rmap->n %d\n",A->cmap->n,B->rmap->n);
5986   ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,A->rmap->N,B->cmap->N);CHKERRQ(ierr);
5987   ierr = MatSetBlockSizesFromMats(C,A,B);CHKERRQ(ierr);
5988   ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr);
5989   if (!cisdense) {
5990     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
5991   }
5992   ierr = MatSetUp(C);CHKERRQ(ierr);
5993 
5994   C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ;
5995   PetscFunctionReturn(0);
5996 }
5997 
5998 /* ----------------------------------------------------------------*/
5999 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C)
6000 {
6001   Mat_Product *product = C->product;
6002   Mat         A = product->A,B=product->B;
6003 
6004   PetscFunctionBegin;
6005   if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)
6006     SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->cmap->rstart,A->cmap->rend,B->rmap->rstart,B->rmap->rend);
6007 
6008   C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ;
6009   C->ops->productsymbolic = MatProductSymbolic_AB;
6010   PetscFunctionReturn(0);
6011 }
6012 
6013 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C)
6014 {
6015   PetscErrorCode ierr;
6016   Mat_Product    *product = C->product;
6017 
6018   PetscFunctionBegin;
6019   if (product->type == MATPRODUCT_AB) {
6020     ierr = MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C);CHKERRQ(ierr);
6021   }
6022   PetscFunctionReturn(0);
6023 }
6024 /* ----------------------------------------------------------------*/
6025 
6026 /*MC
6027    MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices.
6028 
6029    Options Database Keys:
6030 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions()
6031 
6032    Level: beginner
6033 
6034    Notes:
6035     MatSetValues() may be called for this matrix type with a NULL argument for the numerical values,
6036     in this case the values associated with the rows and columns one passes in are set to zero
6037     in the matrix
6038 
6039     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
6040     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
6041 
6042 .seealso: MatCreateAIJ()
6043 M*/
6044 
6045 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B)
6046 {
6047   Mat_MPIAIJ     *b;
6048   PetscErrorCode ierr;
6049   PetscMPIInt    size;
6050 
6051   PetscFunctionBegin;
6052   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
6053 
6054   ierr          = PetscNewLog(B,&b);CHKERRQ(ierr);
6055   B->data       = (void*)b;
6056   ierr          = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
6057   B->assembled  = PETSC_FALSE;
6058   B->insertmode = NOT_SET_VALUES;
6059   b->size       = size;
6060 
6061   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRMPI(ierr);
6062 
6063   /* build cache for off array entries formed */
6064   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
6065 
6066   b->donotstash  = PETSC_FALSE;
6067   b->colmap      = NULL;
6068   b->garray      = NULL;
6069   b->roworiented = PETSC_TRUE;
6070 
6071   /* stuff used for matrix vector multiply */
6072   b->lvec  = NULL;
6073   b->Mvctx = NULL;
6074 
6075   /* stuff for MatGetRow() */
6076   b->rowindices   = NULL;
6077   b->rowvalues    = NULL;
6078   b->getrowactive = PETSC_FALSE;
6079 
6080   /* flexible pointer used in CUSPARSE classes */
6081   b->spptr = NULL;
6082 
6083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetUseScalableIncreaseOverlap_C",MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ);CHKERRQ(ierr);
6084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPIAIJ);CHKERRQ(ierr);
6085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPIAIJ);CHKERRQ(ierr);
6086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_MPIAIJ);CHKERRQ(ierr);
6087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocation_C",MatMPIAIJSetPreallocation_MPIAIJ);CHKERRQ(ierr);
6088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatResetPreallocation_C",MatResetPreallocation_MPIAIJ);CHKERRQ(ierr);
6089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPIAIJSetPreallocationCSR_C",MatMPIAIJSetPreallocationCSR_MPIAIJ);CHKERRQ(ierr);
6090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatDiagonalScaleLocal_C",MatDiagonalScaleLocal_MPIAIJ);CHKERRQ(ierr);
6091   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijperm_C",MatConvert_MPIAIJ_MPIAIJPERM);CHKERRQ(ierr);
6092   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijsell_C",MatConvert_MPIAIJ_MPIAIJSELL);CHKERRQ(ierr);
6093 #if defined(PETSC_HAVE_CUDA)
6094   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcusparse_C",MatConvert_MPIAIJ_MPIAIJCUSPARSE);CHKERRQ(ierr);
6095 #endif
6096 #if defined(PETSC_HAVE_KOKKOS_KERNELS)
6097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijkokkos_C",MatConvert_MPIAIJ_MPIAIJKokkos);CHKERRQ(ierr);
6098 #endif
6099 #if defined(PETSC_HAVE_MKL_SPARSE)
6100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijmkl_C",MatConvert_MPIAIJ_MPIAIJMKL);CHKERRQ(ierr);
6101 #endif
6102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpiaijcrl_C",MatConvert_MPIAIJ_MPIAIJCRL);CHKERRQ(ierr);
6103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpibaij_C",MatConvert_MPIAIJ_MPIBAIJ);CHKERRQ(ierr);
6104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisbaij_C",MatConvert_MPIAIJ_MPISBAIJ);CHKERRQ(ierr);
6105 #if defined(PETSC_HAVE_ELEMENTAL)
6106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_elemental_C",MatConvert_MPIAIJ_Elemental);CHKERRQ(ierr);
6107 #endif
6108 #if defined(PETSC_HAVE_SCALAPACK)
6109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_scalapack_C",MatConvert_AIJ_ScaLAPACK);CHKERRQ(ierr);
6110 #endif
6111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
6112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_mpisell_C",MatConvert_MPIAIJ_MPISELL);CHKERRQ(ierr);
6113 #if defined(PETSC_HAVE_HYPRE)
6114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
6115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_transpose_mpiaij_mpiaij_C",MatProductSetFromOptions_Transpose_AIJ_AIJ);CHKERRQ(ierr);
6116 #endif
6117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_is_mpiaij_C",MatProductSetFromOptions_IS_XAIJ);CHKERRQ(ierr);
6118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpiaij_C",MatProductSetFromOptions_MPIAIJ);CHKERRQ(ierr);
6119   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIAIJ);CHKERRQ(ierr);
6120   PetscFunctionReturn(0);
6121 }
6122 
6123 /*@C
6124      MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal"
6125          and "off-diagonal" part of the matrix in CSR format.
6126 
6127    Collective
6128 
6129    Input Parameters:
6130 +  comm - MPI communicator
6131 .  m - number of local rows (Cannot be PETSC_DECIDE)
6132 .  n - This value should be the same as the local size used in creating the
6133        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
6134        calculated if N is given) For square matrices n is almost always m.
6135 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
6136 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
6137 .   i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix
6138 .   j - column indices
6139 .   a - matrix values
6140 .   oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix
6141 .   oj - column indices
6142 -   oa - matrix values
6143 
6144    Output Parameter:
6145 .   mat - the matrix
6146 
6147    Level: advanced
6148 
6149    Notes:
6150        The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user
6151        must free the arrays once the matrix has been destroyed and not before.
6152 
6153        The i and j indices are 0 based
6154 
6155        See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix
6156 
6157        This sets local rows and cannot be used to set off-processor values.
6158 
6159        Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a
6160        legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does
6161        not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because
6162        the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to
6163        keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all
6164        communication if it is known that only local entries will be set.
6165 
6166 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
6167           MATMPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithArrays()
6168 @*/
6169 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt i[],PetscInt j[],PetscScalar a[],PetscInt oi[], PetscInt oj[],PetscScalar oa[],Mat *mat)
6170 {
6171   PetscErrorCode ierr;
6172   Mat_MPIAIJ     *maij;
6173 
6174   PetscFunctionBegin;
6175   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
6176   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
6177   if (oi[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"oi (row indices) must start with 0");
6178   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
6179   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
6180   ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr);
6181   maij = (Mat_MPIAIJ*) (*mat)->data;
6182 
6183   (*mat)->preallocated = PETSC_TRUE;
6184 
6185   ierr = PetscLayoutSetUp((*mat)->rmap);CHKERRQ(ierr);
6186   ierr = PetscLayoutSetUp((*mat)->cmap);CHKERRQ(ierr);
6187 
6188   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,n,i,j,a,&maij->A);CHKERRQ(ierr);
6189   ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,m,(*mat)->cmap->N,oi,oj,oa,&maij->B);CHKERRQ(ierr);
6190 
6191   ierr = MatAssemblyBegin(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6192   ierr = MatAssemblyEnd(maij->A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6193   ierr = MatAssemblyBegin(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6194   ierr = MatAssemblyEnd(maij->B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6195 
6196   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr);
6197   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6198   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
6199   ierr = MatSetOption(*mat,MAT_NO_OFF_PROC_ENTRIES,PETSC_FALSE);CHKERRQ(ierr);
6200   ierr = MatSetOption(*mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
6201   PetscFunctionReturn(0);
6202 }
6203 
6204 /*
6205     Special version for direct calls from Fortran
6206 */
6207 #include <petsc/private/fortranimpl.h>
6208 
6209 /* Change these macros so can be used in void function */
6210 #undef CHKERRQ
6211 #define CHKERRQ(ierr) CHKERRABORT(PETSC_COMM_WORLD,ierr)
6212 #undef SETERRQ2
6213 #define SETERRQ2(comm,ierr,b,c,d) CHKERRABORT(comm,ierr)
6214 #undef SETERRQ3
6215 #define SETERRQ3(comm,ierr,b,c,d,e) CHKERRABORT(comm,ierr)
6216 #undef SETERRQ
6217 #define SETERRQ(c,ierr,b) CHKERRABORT(c,ierr)
6218 
6219 #if defined(PETSC_HAVE_FORTRAN_CAPS)
6220 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ
6221 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
6222 #define matsetvaluesmpiaij_ matsetvaluesmpiaij
6223 #else
6224 #endif
6225 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat,PetscInt *mm,const PetscInt im[],PetscInt *mn,const PetscInt in[],const PetscScalar v[],InsertMode *maddv,PetscErrorCode *_ierr)
6226 {
6227   Mat            mat  = *mmat;
6228   PetscInt       m    = *mm, n = *mn;
6229   InsertMode     addv = *maddv;
6230   Mat_MPIAIJ     *aij = (Mat_MPIAIJ*)mat->data;
6231   PetscScalar    value;
6232   PetscErrorCode ierr;
6233 
6234   MatCheckPreallocated(mat,1);
6235   if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv;
6236   else if (mat->insertmode != addv) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Cannot mix add values and insert values");
6237   {
6238     PetscInt  i,j,rstart  = mat->rmap->rstart,rend = mat->rmap->rend;
6239     PetscInt  cstart      = mat->cmap->rstart,cend = mat->cmap->rend,row,col;
6240     PetscBool roworiented = aij->roworiented;
6241 
6242     /* Some Variables required in the macro */
6243     Mat        A                    = aij->A;
6244     Mat_SeqAIJ *a                   = (Mat_SeqAIJ*)A->data;
6245     PetscInt   *aimax               = a->imax,*ai = a->i,*ailen = a->ilen,*aj = a->j;
6246     MatScalar  *aa                  = a->a;
6247     PetscBool  ignorezeroentries    = (((a->ignorezeroentries)&&(addv==ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE);
6248     Mat        B                    = aij->B;
6249     Mat_SeqAIJ *b                   = (Mat_SeqAIJ*)B->data;
6250     PetscInt   *bimax               = b->imax,*bi = b->i,*bilen = b->ilen,*bj = b->j,bm = aij->B->rmap->n,am = aij->A->rmap->n;
6251     MatScalar  *ba                  = b->a;
6252     /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we
6253      * cannot use "#if defined" inside a macro. */
6254     PETSC_UNUSED PetscBool inserted = PETSC_FALSE;
6255 
6256     PetscInt  *rp1,*rp2,ii,nrow1,nrow2,_i,rmax1,rmax2,N,low1,high1,low2,high2,t,lastcol1,lastcol2;
6257     PetscInt  nonew = a->nonew;
6258     MatScalar *ap1,*ap2;
6259 
6260     PetscFunctionBegin;
6261     for (i=0; i<m; i++) {
6262       if (im[i] < 0) continue;
6263       if (PetscUnlikelyDebug(im[i] >= mat->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
6264       if (im[i] >= rstart && im[i] < rend) {
6265         row      = im[i] - rstart;
6266         lastcol1 = -1;
6267         rp1      = aj + ai[row];
6268         ap1      = aa + ai[row];
6269         rmax1    = aimax[row];
6270         nrow1    = ailen[row];
6271         low1     = 0;
6272         high1    = nrow1;
6273         lastcol2 = -1;
6274         rp2      = bj + bi[row];
6275         ap2      = ba + bi[row];
6276         rmax2    = bimax[row];
6277         nrow2    = bilen[row];
6278         low2     = 0;
6279         high2    = nrow2;
6280 
6281         for (j=0; j<n; j++) {
6282           if (roworiented) value = v[i*n+j];
6283           else value = v[i+j*m];
6284           if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue;
6285           if (in[j] >= cstart && in[j] < cend) {
6286             col = in[j] - cstart;
6287             MatSetValues_SeqAIJ_A_Private(row,col,value,addv,im[i],in[j]);
6288 #if defined(PETSC_HAVE_DEVICE)
6289             if (A->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) A->offloadmask = PETSC_OFFLOAD_CPU;
6290 #endif
6291           } else if (in[j] < 0) continue;
6292           else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) {
6293             /* extra brace on SETERRQ2() is required for --with-errorchecking=0 - due to the next 'else' clause */
6294             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
6295           } else {
6296             if (mat->was_assembled) {
6297               if (!aij->colmap) {
6298                 ierr = MatCreateColmap_MPIAIJ_Private(mat);CHKERRQ(ierr);
6299               }
6300 #if defined(PETSC_USE_CTABLE)
6301               ierr = PetscTableFind(aij->colmap,in[j]+1,&col);CHKERRQ(ierr);
6302               col--;
6303 #else
6304               col = aij->colmap[in[j]] - 1;
6305 #endif
6306               if (col < 0 && !((Mat_SeqAIJ*)(aij->A->data))->nonew) {
6307                 ierr = MatDisAssemble_MPIAIJ(mat);CHKERRQ(ierr);
6308                 col  =  in[j];
6309                 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */
6310                 B        = aij->B;
6311                 b        = (Mat_SeqAIJ*)B->data;
6312                 bimax    = b->imax; bi = b->i; bilen = b->ilen; bj = b->j;
6313                 rp2      = bj + bi[row];
6314                 ap2      = ba + bi[row];
6315                 rmax2    = bimax[row];
6316                 nrow2    = bilen[row];
6317                 low2     = 0;
6318                 high2    = nrow2;
6319                 bm       = aij->B->rmap->n;
6320                 ba       = b->a;
6321                 inserted = PETSC_FALSE;
6322               }
6323             } else col = in[j];
6324             MatSetValues_SeqAIJ_B_Private(row,col,value,addv,im[i],in[j]);
6325 #if defined(PETSC_HAVE_DEVICE)
6326             if (B->offloadmask != PETSC_OFFLOAD_UNALLOCATED && inserted) B->offloadmask = PETSC_OFFLOAD_CPU;
6327 #endif
6328           }
6329         }
6330       } else if (!aij->donotstash) {
6331         if (roworiented) {
6332           ierr = MatStashValuesRow_Private(&mat->stash,im[i],n,in,v+i*n,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6333         } else {
6334           ierr = MatStashValuesCol_Private(&mat->stash,im[i],n,in,v+i,m,(PetscBool)(ignorezeroentries && (addv == ADD_VALUES)));CHKERRQ(ierr);
6335         }
6336       }
6337     }
6338   }
6339   PetscFunctionReturnVoid();
6340 }
6341 
6342 typedef struct {
6343   Mat       *mp;    /* intermediate products */
6344   PetscBool *mptmp; /* is the intermediate product temporary ? */
6345   PetscInt  cp;     /* number of intermediate products */
6346 
6347   /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */
6348   PetscInt    *startsj_s,*startsj_r;
6349   PetscScalar *bufa;
6350   Mat         P_oth;
6351 
6352   /* may take advantage of merging product->B */
6353   Mat Bloc;
6354 
6355   /* cusparse does not have support to split between symbolic and numeric phases
6356      When api_user is true, we don't need to update the numerical values
6357      of the temporary storage */
6358   PetscBool reusesym;
6359 
6360   /* support for COO values insertion */
6361   PetscScalar  *coo_v,*coo_w;
6362   PetscInt     **own;
6363   PetscInt     **off;
6364   PetscBool    hasoffproc; /* if true, non-local values insertion (i.e. AtB or PtAP) */
6365   PetscSF      sf; /* used for non-local values insertion and memory malloc */
6366   PetscMemType mtype;
6367 
6368   /* customization */
6369   PetscBool abmerge;
6370   PetscBool P_oth_bind;
6371 } MatMatMPIAIJBACKEND;
6372 
6373 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data)
6374 {
6375   MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND*)data;
6376   PetscInt            i;
6377   PetscErrorCode      ierr;
6378 
6379   PetscFunctionBegin;
6380   ierr = PetscFree2(mmdata->startsj_s,mmdata->startsj_r);CHKERRQ(ierr);
6381   ierr = PetscFree(mmdata->bufa);CHKERRQ(ierr);
6382   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_v);CHKERRQ(ierr);
6383   ierr = PetscSFFree(mmdata->sf,mmdata->mtype,mmdata->coo_w);CHKERRQ(ierr);
6384   ierr = MatDestroy(&mmdata->P_oth);CHKERRQ(ierr);
6385   ierr = MatDestroy(&mmdata->Bloc);CHKERRQ(ierr);
6386   ierr = PetscSFDestroy(&mmdata->sf);CHKERRQ(ierr);
6387   for (i = 0; i < mmdata->cp; i++) {
6388     ierr = MatDestroy(&mmdata->mp[i]);CHKERRQ(ierr);
6389   }
6390   ierr = PetscFree(mmdata->mp);CHKERRQ(ierr);
6391   ierr = PetscFree(mmdata->mptmp);CHKERRQ(ierr);
6392   ierr = PetscFree(mmdata->own[0]);CHKERRQ(ierr);
6393   ierr = PetscFree(mmdata->own);CHKERRQ(ierr);
6394   ierr = PetscFree(mmdata->off[0]);CHKERRQ(ierr);
6395   ierr = PetscFree(mmdata->off);CHKERRQ(ierr);
6396   ierr = PetscFree(mmdata);CHKERRQ(ierr);
6397   PetscFunctionReturn(0);
6398 }
6399 
6400 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[])
6401 {
6402   PetscErrorCode (*f)(Mat,PetscInt,const PetscInt[],PetscScalar[]);
6403   PetscErrorCode ierr;
6404 
6405   PetscFunctionBegin;
6406   ierr = PetscObjectQueryFunction((PetscObject)A,"MatSeqAIJCopySubArray_C",&f);CHKERRQ(ierr);
6407   if (f) {
6408     ierr = (*f)(A,n,idx,v);CHKERRQ(ierr);
6409   } else {
6410     const PetscScalar *vv;
6411 
6412     ierr = MatSeqAIJGetArrayRead(A,&vv);CHKERRQ(ierr);
6413     if (n && idx) {
6414       PetscScalar    *w = v;
6415       const PetscInt *oi = idx;
6416       PetscInt       j;
6417 
6418       for (j = 0; j < n; j++) *w++ = vv[*oi++];
6419     } else {
6420       ierr = PetscArraycpy(v,vv,n);CHKERRQ(ierr);
6421     }
6422     ierr = MatSeqAIJRestoreArrayRead(A,&vv);CHKERRQ(ierr);
6423   }
6424   PetscFunctionReturn(0);
6425 }
6426 
6427 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C)
6428 {
6429   MatMatMPIAIJBACKEND *mmdata;
6430   PetscInt            i,n_d,n_o;
6431   PetscErrorCode      ierr;
6432 
6433   PetscFunctionBegin;
6434   MatCheckProduct(C,1);
6435   if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty");
6436   mmdata = (MatMatMPIAIJBACKEND*)C->product->data;
6437   if (!mmdata->reusesym) { /* update temporary matrices */
6438     if (mmdata->P_oth) {
6439       ierr = MatGetBrowsOfAoCols_MPIAIJ(C->product->A,C->product->B,MAT_REUSE_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6440     }
6441     if (mmdata->Bloc) {
6442       ierr = MatMPIAIJGetLocalMatMerge(C->product->B,MAT_REUSE_MATRIX,NULL,&mmdata->Bloc);CHKERRQ(ierr);
6443     }
6444   }
6445   mmdata->reusesym = PETSC_FALSE;
6446 
6447   for (i = 0; i < mmdata->cp; i++) {
6448     if (!mmdata->mp[i]->ops->productnumeric) SETERRQ1(PetscObjectComm((PetscObject)mmdata->mp[i]),PETSC_ERR_PLIB,"Missing numeric op for %s",MatProductTypes[mmdata->mp[i]->product->type]);
6449     ierr = (*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i]);CHKERRQ(ierr);
6450   }
6451   for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) {
6452     PetscInt noff = mmdata->off[i+1] - mmdata->off[i];
6453 
6454     if (mmdata->mptmp[i]) continue;
6455     if (noff) {
6456       PetscInt nown = mmdata->own[i+1] - mmdata->own[i];
6457 
6458       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],noff,mmdata->off[i],mmdata->coo_w + n_o);CHKERRQ(ierr);
6459       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],nown,mmdata->own[i],mmdata->coo_v + n_d);CHKERRQ(ierr);
6460       n_o += noff;
6461       n_d += nown;
6462     } else {
6463       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mmdata->mp[i]->data;
6464 
6465       ierr = MatSeqAIJCopySubArray(mmdata->mp[i],mm->nz,NULL,mmdata->coo_v + n_d);CHKERRQ(ierr);
6466       n_d += mm->nz;
6467     }
6468   }
6469   if (mmdata->hasoffproc) { /* offprocess insertion */
6470     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6471     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_SCALAR,mmdata->coo_w,mmdata->coo_v+n_d);CHKERRQ(ierr);
6472   }
6473   ierr = MatSetValuesCOO(C,mmdata->coo_v,INSERT_VALUES);CHKERRQ(ierr);
6474   PetscFunctionReturn(0);
6475 }
6476 
6477 /* Support for Pt * A, A * P, or Pt * A * P */
6478 #define MAX_NUMBER_INTERMEDIATE 4
6479 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C)
6480 {
6481   Mat_Product            *product = C->product;
6482   Mat                    A,P,mp[MAX_NUMBER_INTERMEDIATE];
6483   Mat_MPIAIJ             *a,*p;
6484   MatMatMPIAIJBACKEND    *mmdata;
6485   ISLocalToGlobalMapping P_oth_l2g = NULL;
6486   IS                     glob = NULL;
6487   const char             *prefix;
6488   char                   pprefix[256];
6489   const PetscInt         *globidx,*P_oth_idx;
6490   const PetscInt         *cmapa[MAX_NUMBER_INTERMEDIATE],*rmapa[MAX_NUMBER_INTERMEDIATE];
6491   PetscInt               cp = 0,m,n,M,N,ncoo,ncoo_d,ncoo_o,ncoo_oown,*coo_i,*coo_j,cmapt[MAX_NUMBER_INTERMEDIATE],rmapt[MAX_NUMBER_INTERMEDIATE],i,j;
6492   MatProductType         ptype;
6493   PetscBool              mptmp[MAX_NUMBER_INTERMEDIATE],hasoffproc = PETSC_FALSE,iscuda,iskokk;
6494   PetscMPIInt            size;
6495   PetscErrorCode         ierr;
6496 
6497   PetscFunctionBegin;
6498   MatCheckProduct(C,1);
6499   if (product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty");
6500   ptype = product->type;
6501   if (product->A->symmetric && ptype == MATPRODUCT_AtB) ptype = MATPRODUCT_AB;
6502   switch (ptype) {
6503   case MATPRODUCT_AB:
6504     A = product->A;
6505     P = product->B;
6506     m = A->rmap->n;
6507     n = P->cmap->n;
6508     M = A->rmap->N;
6509     N = P->cmap->N;
6510     break;
6511   case MATPRODUCT_AtB:
6512     P = product->A;
6513     A = product->B;
6514     m = P->cmap->n;
6515     n = A->cmap->n;
6516     M = P->cmap->N;
6517     N = A->cmap->N;
6518     hasoffproc = PETSC_TRUE;
6519     break;
6520   case MATPRODUCT_PtAP:
6521     A = product->A;
6522     P = product->B;
6523     m = P->cmap->n;
6524     n = P->cmap->n;
6525     M = P->cmap->N;
6526     N = P->cmap->N;
6527     hasoffproc = PETSC_TRUE;
6528     break;
6529   default:
6530     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6531   }
6532   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)C),&size);CHKERRQ(ierr);
6533   if (size == 1) hasoffproc = PETSC_FALSE;
6534 
6535   /* defaults */
6536   for (i=0;i<MAX_NUMBER_INTERMEDIATE;i++) {
6537     mp[i]    = NULL;
6538     mptmp[i] = PETSC_FALSE;
6539     rmapt[i] = -1;
6540     cmapt[i] = -1;
6541     rmapa[i] = NULL;
6542     cmapa[i] = NULL;
6543   }
6544 
6545   /* customization */
6546   ierr = PetscNew(&mmdata);CHKERRQ(ierr);
6547   mmdata->reusesym = product->api_user;
6548   if (ptype == MATPRODUCT_AB) {
6549     if (product->api_user) {
6550       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6551       ierr = PetscOptionsBool("-matmatmult_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6552       ierr = PetscOptionsBool("-matmatmult_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6553       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6554     } else {
6555       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6556       ierr = PetscOptionsBool("-matproduct_ab_backend_mergeB","Merge product->B local matrices","MatMatMult",mmdata->abmerge,&mmdata->abmerge,NULL);CHKERRQ(ierr);
6557       ierr = PetscOptionsBool("-matproduct_ab_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6558       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6559     }
6560   } else if (ptype == MATPRODUCT_PtAP) {
6561     if (product->api_user) {
6562       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6563       ierr = PetscOptionsBool("-matptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6564       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6565     } else {
6566       ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6567       ierr = PetscOptionsBool("-matproduct_ptap_backend_pothbind","Bind P_oth to CPU","MatBindToCPU",mmdata->P_oth_bind,&mmdata->P_oth_bind,NULL);CHKERRQ(ierr);
6568       ierr = PetscOptionsEnd();CHKERRQ(ierr);
6569     }
6570   }
6571   a = (Mat_MPIAIJ*)A->data;
6572   p = (Mat_MPIAIJ*)P->data;
6573   ierr = MatSetSizes(C,m,n,M,N);CHKERRQ(ierr);
6574   ierr = PetscLayoutSetUp(C->rmap);CHKERRQ(ierr);
6575   ierr = PetscLayoutSetUp(C->cmap);CHKERRQ(ierr);
6576   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
6577   ierr = MatGetOptionsPrefix(C,&prefix);CHKERRQ(ierr);
6578   switch (ptype) {
6579   case MATPRODUCT_AB: /* A * P */
6580     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6581 
6582     if (mmdata->abmerge) { /* A_diag * P_loc and A_off * P_oth */
6583       /* P is product->B */
6584       ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6585       ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6586       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6587       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6588       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6589       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6590       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6591       mp[cp]->product->api_user = product->api_user;
6592       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6593       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6594       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6595       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6596       rmapt[cp] = 1;
6597       cmapt[cp] = 2;
6598       cmapa[cp] = globidx;
6599       mptmp[cp] = PETSC_FALSE;
6600       cp++;
6601     } else { /* A_diag * P_diag and A_diag * P_off and A_off * P_oth */
6602       ierr = MatProductCreate(a->A,p->A,NULL,&mp[cp]);CHKERRQ(ierr);
6603       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6604       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6605       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6606       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6607       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6608       mp[cp]->product->api_user = product->api_user;
6609       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6610       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6611       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6612       rmapt[cp] = 1;
6613       cmapt[cp] = 1;
6614       mptmp[cp] = PETSC_FALSE;
6615       cp++;
6616       ierr = MatProductCreate(a->A,p->B,NULL,&mp[cp]);CHKERRQ(ierr);
6617       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6618       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6619       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6620       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6621       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6622       mp[cp]->product->api_user = product->api_user;
6623       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6624       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6625       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6626       rmapt[cp] = 1;
6627       cmapt[cp] = 2;
6628       cmapa[cp] = p->garray;
6629       mptmp[cp] = PETSC_FALSE;
6630       cp++;
6631     }
6632     if (mmdata->P_oth) {
6633       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6634       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6635       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6636       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6637       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6638       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6639       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6640       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6641       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6642       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6643       mp[cp]->product->api_user = product->api_user;
6644       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6645       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6646       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6647       rmapt[cp] = 1;
6648       cmapt[cp] = 2;
6649       cmapa[cp] = P_oth_idx;
6650       mptmp[cp] = PETSC_FALSE;
6651       cp++;
6652     }
6653     break;
6654   case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */
6655     /* A is product->B */
6656     ierr = MatMPIAIJGetLocalMatMerge(A,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6657     if (A == P) {
6658       ierr = MatProductCreate(mmdata->Bloc,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6659       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6660       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6661       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6662       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6663       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6664       mp[cp]->product->api_user = product->api_user;
6665       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6666       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6667       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6668       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6669       rmapt[cp] = 2;
6670       rmapa[cp] = globidx;
6671       cmapt[cp] = 2;
6672       cmapa[cp] = globidx;
6673       mptmp[cp] = PETSC_FALSE;
6674       cp++;
6675     } else {
6676       ierr = MatProductCreate(p->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6677       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6678       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6679       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6680       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6681       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6682       mp[cp]->product->api_user = product->api_user;
6683       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6684       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6685       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6686       ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6687       rmapt[cp] = 1;
6688       cmapt[cp] = 2;
6689       cmapa[cp] = globidx;
6690       mptmp[cp] = PETSC_FALSE;
6691       cp++;
6692       ierr = MatProductCreate(p->B,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6693       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6694       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6695       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6696       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6697       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6698       mp[cp]->product->api_user = product->api_user;
6699       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6700       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6701       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6702       rmapt[cp] = 2;
6703       rmapa[cp] = p->garray;
6704       cmapt[cp] = 2;
6705       cmapa[cp] = globidx;
6706       mptmp[cp] = PETSC_FALSE;
6707       cp++;
6708     }
6709     break;
6710   case MATPRODUCT_PtAP:
6711     ierr = MatGetBrowsOfAoCols_MPIAIJ(A,P,MAT_INITIAL_MATRIX,&mmdata->startsj_s,&mmdata->startsj_r,&mmdata->bufa,&mmdata->P_oth);CHKERRQ(ierr);
6712     /* P is product->B */
6713     ierr = MatMPIAIJGetLocalMatMerge(P,MAT_INITIAL_MATRIX,&glob,&mmdata->Bloc);CHKERRQ(ierr);
6714     ierr = MatProductCreate(a->A,mmdata->Bloc,NULL,&mp[cp]);CHKERRQ(ierr);
6715     ierr = MatProductSetType(mp[cp],MATPRODUCT_PtAP);CHKERRQ(ierr);
6716     ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6717     ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6718     ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6719     ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6720     mp[cp]->product->api_user = product->api_user;
6721     ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6722     if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6723     ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6724     ierr = ISGetIndices(glob,&globidx);CHKERRQ(ierr);
6725     rmapt[cp] = 2;
6726     rmapa[cp] = globidx;
6727     cmapt[cp] = 2;
6728     cmapa[cp] = globidx;
6729     mptmp[cp] = PETSC_FALSE;
6730     cp++;
6731     if (mmdata->P_oth) {
6732       ierr = MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth,&P_oth_l2g);CHKERRQ(ierr);
6733       ierr = ISLocalToGlobalMappingGetIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6734       ierr = MatSetType(mmdata->P_oth,((PetscObject)(a->B))->type_name);CHKERRQ(ierr);
6735       ierr = MatBindToCPU(mmdata->P_oth,mmdata->P_oth_bind);CHKERRQ(ierr);
6736       ierr = MatProductCreate(a->B,mmdata->P_oth,NULL,&mp[cp]);CHKERRQ(ierr);
6737       ierr = MatProductSetType(mp[cp],MATPRODUCT_AB);CHKERRQ(ierr);
6738       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6739       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6740       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6741       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6742       mp[cp]->product->api_user = product->api_user;
6743       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6744       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6745       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6746       mptmp[cp] = PETSC_TRUE;
6747       cp++;
6748       ierr = MatProductCreate(mmdata->Bloc,mp[1],NULL,&mp[cp]);CHKERRQ(ierr);
6749       ierr = MatProductSetType(mp[cp],MATPRODUCT_AtB);CHKERRQ(ierr);
6750       ierr = MatProductSetFill(mp[cp],product->fill);CHKERRQ(ierr);
6751       ierr = PetscSNPrintf(pprefix,sizeof(pprefix),"backend_p%D_",cp);CHKERRQ(ierr);
6752       ierr = MatSetOptionsPrefix(mp[cp],prefix);CHKERRQ(ierr);
6753       ierr = MatAppendOptionsPrefix(mp[cp],pprefix);CHKERRQ(ierr);
6754       mp[cp]->product->api_user = product->api_user;
6755       ierr = MatProductSetFromOptions(mp[cp]);CHKERRQ(ierr);
6756       if (!mp[cp]->ops->productsymbolic) SETERRQ1(PetscObjectComm((PetscObject)mp[cp]),PETSC_ERR_PLIB,"Missing symbolic op for %s",MatProductTypes[mp[cp]->product->type]);
6757       ierr = (*mp[cp]->ops->productsymbolic)(mp[cp]);CHKERRQ(ierr);
6758       rmapt[cp] = 2;
6759       rmapa[cp] = globidx;
6760       cmapt[cp] = 2;
6761       cmapa[cp] = P_oth_idx;
6762       mptmp[cp] = PETSC_FALSE;
6763       cp++;
6764     }
6765     break;
6766   default:
6767     SETERRQ1(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Not for product type %s",MatProductTypes[ptype]);
6768   }
6769   /* sanity check */
6770   if (size > 1) for (i = 0; i < cp; i++) if (rmapt[i] == 2 && !hasoffproc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected offproc map type for product %D",i);
6771 
6772   ierr = PetscMalloc1(cp,&mmdata->mp);CHKERRQ(ierr);
6773   for (i = 0; i < cp; i++) mmdata->mp[i] = mp[i];
6774   ierr = PetscMalloc1(cp,&mmdata->mptmp);CHKERRQ(ierr);
6775   for (i = 0; i < cp; i++) mmdata->mptmp[i] = mptmp[i];
6776   mmdata->cp = cp;
6777   C->product->data       = mmdata;
6778   C->product->destroy    = MatDestroy_MatMatMPIAIJBACKEND;
6779   C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND;
6780 
6781   /* memory type */
6782   mmdata->mtype = PETSC_MEMTYPE_HOST;
6783   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iscuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");CHKERRQ(ierr);
6784   ierr = PetscObjectTypeCompareAny((PetscObject)C,&iskokk,MATSEQAIJKOKKOS,MATMPIAIJKOKKOS,"");CHKERRQ(ierr);
6785   if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA;
6786   // enable the line below MatSeqAIJCopySubArray_SeqAIJKokkos is implemented
6787   //else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_DEVICE;
6788 
6789   /* prepare coo coordinates for values insertion */
6790   for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) {
6791     Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6792     if (mptmp[cp]) continue;
6793     if (rmapt[cp] == 2 && hasoffproc) {
6794       const PetscInt *rmap = rmapa[cp];
6795       const PetscInt mr = mp[cp]->rmap->n;
6796       const PetscInt rs = C->rmap->rstart;
6797       const PetscInt re = C->rmap->rend;
6798       const PetscInt *ii  = mm->i;
6799       for (i = 0; i < mr; i++) {
6800         const PetscInt gr = rmap[i];
6801         const PetscInt nz = ii[i+1] - ii[i];
6802         if (gr < rs || gr >= re) ncoo_o += nz;
6803         else ncoo_oown += nz;
6804       }
6805     } else ncoo_d += mm->nz;
6806   }
6807   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->off);CHKERRQ(ierr);
6808   ierr = PetscCalloc1(mmdata->cp+1,&mmdata->own);CHKERRQ(ierr);
6809   if (hasoffproc) { /* handle offproc values insertion */
6810     PetscSF  msf;
6811     PetscInt ncoo2,*coo_i2,*coo_j2;
6812 
6813     ierr = PetscMalloc1(ncoo_o,&mmdata->off[0]);CHKERRQ(ierr);
6814     ierr = PetscMalloc1(ncoo_oown,&mmdata->own[0]);CHKERRQ(ierr);
6815     ierr = PetscMalloc2(ncoo_o,&coo_i,ncoo_o,&coo_j);CHKERRQ(ierr);
6816     for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) {
6817       Mat_SeqAIJ *mm = (Mat_SeqAIJ*)mp[cp]->data;
6818       PetscInt   *idxoff = mmdata->off[cp];
6819       PetscInt   *idxown = mmdata->own[cp];
6820       if (!mptmp[cp] && rmapt[cp] == 2) {
6821         const PetscInt *rmap = rmapa[cp];
6822         const PetscInt *cmap = cmapa[cp];
6823         const PetscInt *ii  = mm->i;
6824         PetscInt       *coi = coo_i + ncoo_o;
6825         PetscInt       *coj = coo_j + ncoo_o;
6826         const PetscInt mr = mp[cp]->rmap->n;
6827         const PetscInt rs = C->rmap->rstart;
6828         const PetscInt re = C->rmap->rend;
6829         const PetscInt cs = C->cmap->rstart;
6830         for (i = 0; i < mr; i++) {
6831           const PetscInt *jj = mm->j + ii[i];
6832           const PetscInt gr  = rmap[i];
6833           const PetscInt nz  = ii[i+1] - ii[i];
6834           if (gr < rs || gr >= re) {
6835             for (j = ii[i]; j < ii[i+1]; j++) {
6836               *coi++ = gr;
6837               *idxoff++ = j;
6838             }
6839             if (!cmapt[cp]) { /* already global */
6840               for (j = 0; j < nz; j++) *coj++ = jj[j];
6841             } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6842               for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6843             } else { /* offdiag */
6844               for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6845             }
6846             ncoo_o += nz;
6847           } else {
6848             for (j = ii[i]; j < ii[i+1]; j++) *idxown++ = j;
6849           }
6850         }
6851       }
6852       mmdata->off[cp + 1] = idxoff;
6853       mmdata->own[cp + 1] = idxown;
6854     }
6855 
6856     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6857     ierr = PetscSFSetGraphLayout(mmdata->sf,C->rmap,ncoo_o,NULL,PETSC_OWN_POINTER,coo_i);CHKERRQ(ierr);
6858     ierr = PetscSFGetMultiSF(mmdata->sf,&msf);CHKERRQ(ierr);
6859     ierr = PetscSFGetGraph(msf,&ncoo2,NULL,NULL,NULL);CHKERRQ(ierr);
6860     ncoo = ncoo_d + ncoo_oown + ncoo2;
6861     ierr = PetscMalloc2(ncoo,&coo_i2,ncoo,&coo_j2);CHKERRQ(ierr);
6862     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6863     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_i,coo_i2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6864     ierr = PetscSFGatherBegin(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6865     ierr = PetscSFGatherEnd(mmdata->sf,MPIU_INT,coo_j,coo_j2 + ncoo_d + ncoo_oown);CHKERRQ(ierr);
6866     ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6867     ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo_o*sizeof(PetscScalar),(void**)&mmdata->coo_w);CHKERRQ(ierr);
6868     coo_i = coo_i2;
6869     coo_j = coo_j2;
6870   } else { /* no offproc values insertion */
6871     ncoo = ncoo_d;
6872     ierr = PetscMalloc2(ncoo,&coo_i,ncoo,&coo_j);CHKERRQ(ierr);
6873 
6874     ierr = PetscSFCreate(PetscObjectComm((PetscObject)C),&mmdata->sf);CHKERRQ(ierr);
6875     ierr = PetscSFSetGraph(mmdata->sf,0,0,NULL,PETSC_OWN_POINTER,NULL,PETSC_OWN_POINTER);CHKERRQ(ierr);
6876     ierr = PetscSFSetUp(mmdata->sf);CHKERRQ(ierr);
6877   }
6878   mmdata->hasoffproc = hasoffproc;
6879 
6880   /* on-process indices */
6881   for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) {
6882     Mat_SeqAIJ     *mm = (Mat_SeqAIJ*)mp[cp]->data;
6883     PetscInt       *coi = coo_i + ncoo_d;
6884     PetscInt       *coj = coo_j + ncoo_d;
6885     const PetscInt *jj  = mm->j;
6886     const PetscInt *ii  = mm->i;
6887     const PetscInt *cmap = cmapa[cp];
6888     const PetscInt *rmap = rmapa[cp];
6889     const PetscInt mr = mp[cp]->rmap->n;
6890     const PetscInt rs = C->rmap->rstart;
6891     const PetscInt re = C->rmap->rend;
6892     const PetscInt cs = C->cmap->rstart;
6893 
6894     if (mptmp[cp]) continue;
6895     if (rmapt[cp] == 1) {
6896       for (i = 0; i < mr; i++) {
6897         const PetscInt gr = i + rs;
6898         for (j = ii[i]; j < ii[i+1]; j++) coi[j] = gr;
6899       }
6900       /* columns coo */
6901       if (!cmapt[cp]) {
6902         ierr = PetscArraycpy(coj,jj,mm->nz);CHKERRQ(ierr);
6903       } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6904         for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs;
6905       } else { /* offdiag */
6906         for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]];
6907       }
6908       ncoo_d += mm->nz;
6909     } else if (rmapt[cp] == 2) {
6910       for (i = 0; i < mr; i++) {
6911         const PetscInt *jj = mm->j + ii[i];
6912         const PetscInt gr  = rmap[i];
6913         const PetscInt nz  = ii[i+1] - ii[i];
6914         if (gr >= rs && gr < re) {
6915           for (j = ii[i]; j < ii[i+1]; j++) *coi++ = gr;
6916           if (!cmapt[cp]) { /* already global */
6917             for (j = 0; j < nz; j++) *coj++ = jj[j];
6918           } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */
6919             for (j = 0; j < nz; j++) *coj++ = jj[j] + cs;
6920           } else { /* offdiag */
6921             for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]];
6922           }
6923           ncoo_d += nz;
6924         }
6925       }
6926     }
6927   }
6928   if (glob) {
6929     ierr = ISRestoreIndices(glob,&globidx);CHKERRQ(ierr);
6930   }
6931   ierr = ISDestroy(&glob);CHKERRQ(ierr);
6932   if (P_oth_l2g) {
6933     ierr = ISLocalToGlobalMappingRestoreIndices(P_oth_l2g,&P_oth_idx);CHKERRQ(ierr);
6934   }
6935   ierr = ISLocalToGlobalMappingDestroy(&P_oth_l2g);CHKERRQ(ierr);
6936   ierr = PetscSFMalloc(mmdata->sf,mmdata->mtype,ncoo*sizeof(PetscScalar),(void**)&mmdata->coo_v);CHKERRQ(ierr);
6937 
6938   /* preallocate with COO data */
6939   ierr = MatSetPreallocationCOO(C,ncoo,coo_i,coo_j);CHKERRQ(ierr);
6940   ierr = PetscFree2(coo_i,coo_j);CHKERRQ(ierr);
6941   PetscFunctionReturn(0);
6942 }
6943 
6944 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat)
6945 {
6946   Mat_Product    *product = mat->product;
6947   PetscErrorCode ierr;
6948 #if defined(PETSC_HAVE_DEVICE)
6949   PetscBool      match = PETSC_FALSE;
6950   PetscBool      usecpu = PETSC_FALSE;
6951 #else
6952   PetscBool      match = PETSC_TRUE;
6953 #endif
6954 
6955   PetscFunctionBegin;
6956   MatCheckProduct(mat,1);
6957 #if defined(PETSC_HAVE_DEVICE)
6958   if (!product->A->boundtocpu && !product->B->boundtocpu) {
6959     ierr = PetscObjectTypeCompare((PetscObject)product->B,((PetscObject)product->A)->type_name,&match);CHKERRQ(ierr);
6960   }
6961   if (match) { /* we can always fallback to CPU in case an operation is not performing on the device */
6962     switch (product->type) {
6963     case MATPRODUCT_AB:
6964       if (product->api_user) {
6965         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatMatMult","Mat");CHKERRQ(ierr);
6966         ierr = PetscOptionsBool("-matmatmult_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6967         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6968       } else {
6969         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AB","Mat");CHKERRQ(ierr);
6970         ierr = PetscOptionsBool("-matproduct_ab_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6971         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6972       }
6973       break;
6974     case MATPRODUCT_AtB:
6975       if (product->api_user) {
6976         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatTransposeMatMult","Mat");CHKERRQ(ierr);
6977         ierr = PetscOptionsBool("-mattransposematmult_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6978         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6979       } else {
6980         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_AtB","Mat");CHKERRQ(ierr);
6981         ierr = PetscOptionsBool("-matproduct_atb_backend_cpu","Use CPU code","MatTransposeMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6982         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6983       }
6984       break;
6985     case MATPRODUCT_PtAP:
6986       if (product->api_user) {
6987         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatPtAP","Mat");CHKERRQ(ierr);
6988         ierr = PetscOptionsBool("-matptap_backend_cpu","Use CPU code","MatMatMult",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6989         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6990       } else {
6991         ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)mat),((PetscObject)mat)->prefix,"MatProduct_PtAP","Mat");CHKERRQ(ierr);
6992         ierr = PetscOptionsBool("-matproduct_ptap_backend_cpu","Use CPU code","MatPtAP",usecpu,&usecpu,NULL);CHKERRQ(ierr);
6993         ierr = PetscOptionsEnd();CHKERRQ(ierr);
6994       }
6995       break;
6996     default:
6997       break;
6998     }
6999     match = (PetscBool)!usecpu;
7000   }
7001 #endif
7002   if (match) {
7003     switch (product->type) {
7004     case MATPRODUCT_AB:
7005     case MATPRODUCT_AtB:
7006     case MATPRODUCT_PtAP:
7007       mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND;
7008       break;
7009     default:
7010       break;
7011     }
7012   }
7013   /* fallback to MPIAIJ ops */
7014   if (!mat->ops->productsymbolic) {
7015     ierr = MatProductSetFromOptions_MPIAIJ(mat);CHKERRQ(ierr);
7016   }
7017   PetscFunctionReturn(0);
7018 }
7019